From dd8151d2ad9265a349d16fa0fc7634386614ca17 Mon Sep 17 00:00:00 2001 From: fkaufman-asym Date: Thu, 2 Sep 2021 14:12:33 -0400 Subject: [PATCH 01/18] added integration_endpoint --- src/main/resources/api/service_openapi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index 6692ea20..cb36bfd8 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -9,6 +9,7 @@ info: servers: - url: https://cda.cda-dev.broadinstitute.org + - url: https://integration.dev.broadinstitute.org - url: http://localhost:8080 tags: From e08ea0d4482c716fd66f0a255111f9b669fc8c94 Mon Sep 17 00:00:00 2001 From: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Date: Thu, 23 Sep 2021 15:38:25 -0400 Subject: [PATCH 02/18] mergeConflicts (#108) --- build.gradle | 2 +- src/main/resources/api/service_openapi.yaml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build.gradle b/build.gradle index 0319ab11..c57af85d 100644 --- a/build.gradle +++ b/build.gradle @@ -17,7 +17,7 @@ plugins { } group = 'bio.terra.cda' -version = '0.0.19' +version = '0.0.22.3-SNAPSHOT' sourceCompatibility = JavaVersion.VERSION_11 repositories { diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index cb36bfd8..a98bd039 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -1,7 +1,7 @@ openapi: 3.0.3 info: description: API definition for the CDA - version: 1.1.2 + version: 1.1.12 title: CDA API license: name: Apache 2.0 @@ -9,8 +9,8 @@ info: servers: - url: https://cda.cda-dev.broadinstitute.org - - url: https://integration.dev.broadinstitute.org - url: http://localhost:8080 + - url: http://34.71.0.127:8080 tags: - name: query @@ -424,4 +424,4 @@ components: content: application/json: schema: - $ref: '#/components/schemas/JobStatusData' \ No newline at end of file + $ref: '#/components/schemas/JobStatusData' From 631c59a11b66e94eea14bf0da58d213c21ab724d Mon Sep 17 00:00:00 2001 From: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Date: Wed, 20 Oct 2021 09:27:55 -0400 Subject: [PATCH 03/18] Issue 109 columns ap iendpoint (#111) * create a columns API endpoint * does not work yet * bump version number * bump version * remove unit test from this branch --- build.gradle | 2 +- .../cda/app/controller/QueryApiController.java | 14 ++++++++++++++ src/main/resources/api/service_openapi.yaml | 18 +++++++++++++++++- .../app/controller/QueryApiControllerTest.java | 2 ++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index c57af85d..65c8bab6 100644 --- a/build.gradle +++ b/build.gradle @@ -17,7 +17,7 @@ plugins { } group = 'bio.terra.cda' -version = '0.0.22.3-SNAPSHOT' +version = '0.0.22.5-SNAPSHOT' sourceCompatibility = JavaVersion.VERSION_11 repositories { diff --git a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java index 20f568cf..3db352b2 100644 --- a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java +++ b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java @@ -123,4 +123,18 @@ public ResponseEntity uniqueValues(String version, String body return sendQuery(querySql, false); } + + @Override + public ResponseEntity columns(String version, String table) { + table = applicationConfiguration.getBqTable(); + String querySql = + "SELECT field_path FROM " + + table + + ".INFORMATION_SCHEMA.COLUMN_FIELD_PATHS WHERE table_name = '" + + version + + "'"; + logger.debug("columns: " + querySql); + + return sendQuery(querySql, false); + } } diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index a98bd039..9ef55c6b 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -195,6 +195,22 @@ paths: 200: $ref: '#/components/responses/QueryCreated' + /api/v1/columns/{version}: + get: + summary: Returns all column names + description: Return columnNames for schema + operationId: columns + tags: + - query + + parameters: + - $ref: '#/components/parameters/DatasetVersion' + - $ref: '#/components/parameters/Table' + + responses: + 200: + $ref: '#/components/responses/QueryCreated' + components: parameters: @@ -239,7 +255,7 @@ components: schema: type: string default: gdc-bq-sample.cda_mvp - description: table name + description: tablename schemas: diff --git a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java index 2804e5f2..b15519f2 100644 --- a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java +++ b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java @@ -7,6 +7,7 @@ import static org.mockito.Mockito.only; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.verify; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; @@ -79,4 +80,5 @@ public void uniqueValuesTest() throws Exception { objectMapper.readValue(result.getResponse().getContentAsString(), QueryCreatedData.class); assertThat(response.getQuerySql(), equalTo(expected)); } + } From 47b25e17602f1b98f46e7d5a2179d0464137f345 Mon Sep 17 00:00:00 2001 From: Dion Boles <82461519+dionboles-asym@users.noreply.github.com> Date: Wed, 20 Oct 2021 09:30:18 -0400 Subject: [PATCH 04/18] Update unique terms bug (#112) * Updated the uniqueValues to add table * Added a Table var to uniqueValuesTest * Added a Table var to uniqueValuesTest --- .../terra/cda/app/controller/QueryApiController.java | 10 ++++++++-- src/main/resources/api/service_openapi.yaml | 12 ++++++++++-- .../cda/app/controller/QueryApiControllerTest.java | 2 ++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java index 3db352b2..94d175b6 100644 --- a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java +++ b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java @@ -99,8 +99,14 @@ public ResponseEntity booleanQuery( } @Override - public ResponseEntity uniqueValues(String version, String body, String system) { - String table = applicationConfiguration.getBqTable() + "." + version; + public ResponseEntity uniqueValues(String version, String body, String system, String tableName) { + String table; + if(tableName == null){ + table = applicationConfiguration.getBqTable() + "." + version; + }else { + table = tableName + "." + version; + } + NestedColumn nt = NestedColumn.generate(body); Set unnestClauses = nt.getUnnestClauses(); final String whereClause; diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index 9ef55c6b..e28f6b0a 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -182,6 +182,7 @@ paths: parameters: - $ref: '#/components/parameters/DatasetVersion' - $ref: '#/components/parameters/SystemValue' + - $ref: '#/components/parameters/AddTable' requestBody: description: column_name of table value being requested @@ -228,6 +229,13 @@ components: schema: type: string description: Filter on system for results + AddTable: + in: query + name: table name + schema: + type: string + default: gdc-bq-sample.cda_mvp + description: Filter on system for results ResultOffset: in: query name: offset @@ -396,7 +404,7 @@ components: properties: node_type: type: string - enum: [column, quoted, unquoted, ">=", "<=", "<", ">", "=", "!=", AND, OR, NOT, SUBQUERY] + enum: [column, quoted, unquoted, ">=", "<=", "<", ">", "=", "!=", AND, OR, NOT, SUBQUERY,WHERE] description: > Query contents and behavior depends on `node_type`: * `column` - column name is in `value` @@ -436,7 +444,7 @@ components: $ref: '#/components/schemas/QueryResponseData' JobStatusResponse: - description: + description: Get's Bigquery job id status content: application/json: schema: diff --git a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java index b15519f2..37aa4f9f 100644 --- a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java +++ b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java @@ -66,12 +66,14 @@ public void uniqueValuesTest() throws Exception { String version = "v3"; String system = "GDC"; String body = "sex"; + String tableName = "TABLE"; var expected = "SELECT DISTINCT sex FROM TABLE.v3, UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier WHERE _identifier.system = 'GDC'"; var result = mvc.perform( post("/api/v1/unique-values/{version}", version) .param("system", system) + .param("table name",tableName) .contentType(MediaType.valueOf("text/plain")) .content(body) .accept(MediaType.APPLICATION_JSON)) From 3ff01b13ae29c999abd3363a72ed7da1d843823c Mon Sep 17 00:00:00 2001 From: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Date: Wed, 20 Oct 2021 10:37:20 -0400 Subject: [PATCH 05/18] Add null check to columns API (#113) * Add null check to columns API * fix parameters in Table --- .../app/controller/QueryApiController.java | 24 ++++++++++++------- src/main/resources/api/service_openapi.yaml | 2 +- .../controller/QueryApiControllerTest.java | 6 ++--- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java index 94d175b6..ba064fcc 100644 --- a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java +++ b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java @@ -99,12 +99,13 @@ public ResponseEntity booleanQuery( } @Override - public ResponseEntity uniqueValues(String version, String body, String system, String tableName) { - String table; - if(tableName == null){ - table = applicationConfiguration.getBqTable() + "." + version; - }else { - table = tableName + "." + version; + public ResponseEntity uniqueValues( + String version, String body, String system, String table) { + String tableName; + if (table == null) { + tableName = applicationConfiguration.getBqTable() + "." + version; + } else { + tableName = table + "." + version; } NestedColumn nt = NestedColumn.generate(body); @@ -124,7 +125,7 @@ public ResponseEntity uniqueValues(String version, String body unnestClauses.stream().forEach((k) -> unnestConcat.append(k)); String querySql = - "SELECT DISTINCT " + nt.getColumn() + " FROM " + table + unnestConcat + whereClause; + "SELECT DISTINCT " + nt.getColumn() + " FROM " + tableName + unnestConcat + whereClause; logger.debug("uniqueValues: " + querySql); return sendQuery(querySql, false); @@ -132,10 +133,15 @@ public ResponseEntity uniqueValues(String version, String body @Override public ResponseEntity columns(String version, String table) { - table = applicationConfiguration.getBqTable(); + String tableName; + if (table == null) { + tableName = applicationConfiguration.getBqTable(); + } else { + tableName = table; + } String querySql = "SELECT field_path FROM " - + table + + tableName + ".INFORMATION_SCHEMA.COLUMN_FIELD_PATHS WHERE table_name = '" + version + "'"; diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index e28f6b0a..e9cb2655 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -182,7 +182,7 @@ paths: parameters: - $ref: '#/components/parameters/DatasetVersion' - $ref: '#/components/parameters/SystemValue' - - $ref: '#/components/parameters/AddTable' + - $ref: '#/components/parameters/Table' requestBody: description: column_name of table value being requested diff --git a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java index 37aa4f9f..2a0fc006 100644 --- a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java +++ b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java @@ -7,7 +7,6 @@ import static org.mockito.Mockito.only; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.verify; -import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; @@ -66,14 +65,14 @@ public void uniqueValuesTest() throws Exception { String version = "v3"; String system = "GDC"; String body = "sex"; - String tableName = "TABLE"; + String table = "TABLE"; var expected = "SELECT DISTINCT sex FROM TABLE.v3, UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier WHERE _identifier.system = 'GDC'"; var result = mvc.perform( post("/api/v1/unique-values/{version}", version) .param("system", system) - .param("table name",tableName) + .param("table", table) .contentType(MediaType.valueOf("text/plain")) .content(body) .accept(MediaType.APPLICATION_JSON)) @@ -82,5 +81,4 @@ public void uniqueValuesTest() throws Exception { objectMapper.readValue(result.getResponse().getContentAsString(), QueryCreatedData.class); assertThat(response.getQuerySql(), equalTo(expected)); } - } From b32a295eab684c1fab5ab972a383ed164b7f5093 Mon Sep 17 00:00:00 2001 From: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Date: Wed, 20 Oct 2021 11:29:35 -0400 Subject: [PATCH 06/18] update schema and table for openAPI (#114) --- src/main/resources/api/service_openapi.yaml | 8 ++++---- src/main/resources/application.properties | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index e9cb2655..66e879de 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -1,7 +1,7 @@ openapi: 3.0.3 info: description: API definition for the CDA - version: 1.1.12 + version: 2.0.0 title: CDA API license: name: Apache 2.0 @@ -221,7 +221,7 @@ components: required: true schema: type: string - default: v3 + default: all_v2 description: Dataset version SystemValue: in: query @@ -234,7 +234,7 @@ components: name: table name schema: type: string - default: gdc-bq-sample.cda_mvp + default: gdc-bq-sample.integration description: Filter on system for results ResultOffset: in: query @@ -262,7 +262,7 @@ components: name: table schema: type: string - default: gdc-bq-sample.cda_mvp + default: gdc-bq-sample.integration description: tablename diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index ddf6cd65..9ea0ee6a 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,3 +1,3 @@ server.port=8080 -cda.bqTable=gdc-bq-sample.cda_mvp -cda.datasetVersion=v3 \ No newline at end of file +cda.bqTable=gdc-bq-sample.integration +cda.datasetVersion=all_v2 \ No newline at end of file From 802c2d83e16a7a9e7597298c3f9a461fd17ba1fc Mon Sep 17 00:00:00 2001 From: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Date: Wed, 20 Oct 2021 11:52:42 -0400 Subject: [PATCH 07/18] Integration tests (#115) * Merged idc tables (#101) * Point to merged IDC view * update tablename * Tag as version 0.0.20 (#102) * Updated the uniqueValues to add table (#104) * Updated the uniqueValues to add table * Added a Table var to uniqueValuesTest * fixup of version number Co-authored-by: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Co-authored-by: fkaufman-asym * tag version 0.0.21 (#105) * bump tag version v.0.0.22 (#107) * Integration Tests Co-authored-by: Dion Boles <82461519+dionboles-asym@users.noreply.github.com> --- build.gradle | 32 ++- .../bio/terra/cda/app/model/QueryResult.java | 63 +++++ .../bio/terra/cda/app/model/SchemaObject.java | 94 +++++++ .../terra/cda/app/model/SchemaObjectList.java | 58 +++++ .../terra/cda/app/model/StatusMessage.java | 3 + .../app/service/EndToEndIntegrationTests.java | 240 ++++++++++++++++++ src/main/resources/api/service_openapi.yaml | 2 +- src/main/resources/application.properties | 2 +- .../controller/QueryApiControllerTest.java | 1 + 9 files changed, 492 insertions(+), 3 deletions(-) create mode 100644 src/integrationTest/java/bio/terra/cda/app/model/QueryResult.java create mode 100644 src/integrationTest/java/bio/terra/cda/app/model/SchemaObject.java create mode 100644 src/integrationTest/java/bio/terra/cda/app/model/SchemaObjectList.java create mode 100644 src/integrationTest/java/bio/terra/cda/app/model/StatusMessage.java create mode 100644 src/integrationTest/java/bio/terra/cda/app/service/EndToEndIntegrationTests.java diff --git a/build.gradle b/build.gradle index 65c8bab6..9321160e 100644 --- a/build.gradle +++ b/build.gradle @@ -23,6 +23,17 @@ sourceCompatibility = JavaVersion.VERSION_11 repositories { mavenCentral() } +sourceSets { + integrationTest { + compileClasspath += sourceSets.main.output + runtimeClasspath += sourceSets.main.output + } +} + +configurations { + integrationTestImplementation.extendsFrom implementation + integrationTestRuntimeOnly.extendsFrom runtimeOnly +} dependencies { implementation group: 'org.springframework.boot', name: 'spring-boot-starter-data-jdbc' @@ -40,6 +51,11 @@ dependencies { // -- OpenAPI CodeGen dependencies -- implementation group: 'io.swagger.core.v3', name: 'swagger-annotations', version: '2.1.7' implementation group: 'io.springfox', name: 'springfox-swagger2', version: '3.0.0' + implementation 'org.junit.jupiter:junit-jupiter:5.7.0' + implementation 'org.junit.jupiter:junit-jupiter:5.7.0' + implementation 'org.junit.jupiter:junit-jupiter:5.7.0' + implementation 'org.junit.jupiter:junit-jupiter:5.7.0' + implementation 'org.hamcrest:hamcrest-library:2.2' // -- -- testImplementation('org.springframework.boot:spring-boot-starter-test') { @@ -88,10 +104,24 @@ task buildPythonSdk(type: GenerateTask){ inputSpec = "${openapiSourceFile}".toString() outputDir = "${openapiClientTargetDir}".toString() configOptions = [ - "packageVersion" : "1.1.2", + "packageVersion" : "9.15.2021", ] } +task integrationTest(type: Test) { + description = 'Integration tests.' + group = 'verification' + + testClassesDirs = sourceSets.integrationTest.output.classesDirs + classpath = sourceSets.integrationTest.runtimeClasspath + + outputs.upToDateWhen { false } // other way: inputs.upToDateWhen { false } + + mustRunAfter test +} + +check.dependsOn integrationTest + openApiValidate { inputSpec = "${openapiSourceFile}".toString() } diff --git a/src/integrationTest/java/bio/terra/cda/app/model/QueryResult.java b/src/integrationTest/java/bio/terra/cda/app/model/QueryResult.java new file mode 100644 index 00000000..3268ff28 --- /dev/null +++ b/src/integrationTest/java/bio/terra/cda/app/model/QueryResult.java @@ -0,0 +1,63 @@ +package bio.terra.cda.app.model; + +import java.util.List; + +public class QueryResult { + private String query_id; + private String query_sql; + private String message; + private String statusCode; + private List causes; + + public String getQuery_id() { + return query_id; + } + + public void setQuery_id(String query_id) { + this.query_id = query_id; + } + + public String getQuery_sql() { + return query_sql; + } + + public void setQuery_sql(String query_sql) { + this.query_sql = query_sql; + } + + public String getMessage() { + return message; + } + + public void setMessage(String message) { + this.message = message; + } + + public String getStatusCode() { + return statusCode; + } + + public void setStatusCode(String statusCode) { + this.statusCode = statusCode; + } + + public List getCauses() { + return causes; + } + + public void setCauses(List causes) { + this.causes = causes; + } + + @Override + public String toString() { + return "QueryResult{" + + "query_id='" + + query_id + + '\'' + + ", query_sql='" + + query_sql + + '\'' + + '}'; + } +} diff --git a/src/integrationTest/java/bio/terra/cda/app/model/SchemaObject.java b/src/integrationTest/java/bio/terra/cda/app/model/SchemaObject.java new file mode 100644 index 00000000..ec597a69 --- /dev/null +++ b/src/integrationTest/java/bio/terra/cda/app/model/SchemaObject.java @@ -0,0 +1,94 @@ +package bio.terra.cda.app.model; + +public class SchemaObject { + private String table_catalog; + private String table_schema; + private String table_name; + private String column_name; + private String field_path; + private String data_type; + private String description; + + public String getTable_catalog() { + return table_catalog; + } + + public void setTable_catalog(String table_catalog) { + this.table_catalog = table_catalog; + } + + public String getTable_schema() { + return table_schema; + } + + public void setTable_schema(String table_schema) { + this.table_schema = table_schema; + } + + public String getTable_name() { + return table_name; + } + + public void setTable_name(String table_name) { + this.table_name = table_name; + } + + public String getColumn_name() { + return column_name; + } + + public void setColumn_name(String column_name) { + this.column_name = column_name; + } + + public String getField_path() { + return field_path; + } + + public void setField_path(String field_path) { + this.field_path = field_path; + } + + public String getData_type() { + return data_type; + } + + public void setData_type(String data_type) { + this.data_type = data_type; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + @Override + public String toString() { + return "SchemaObject{" + + "table_catalog='" + + table_catalog + + '\'' + + ", table_schema='" + + table_schema + + '\'' + + ", table_name='" + + table_name + + '\'' + + ", column_name='" + + column_name + + '\'' + + ", field_path='" + + field_path + + '\'' + + ", data_type='" + + data_type + + '\'' + + ", description='" + + description + + '\'' + + '}'; + } +} diff --git a/src/integrationTest/java/bio/terra/cda/app/model/SchemaObjectList.java b/src/integrationTest/java/bio/terra/cda/app/model/SchemaObjectList.java new file mode 100644 index 00000000..7b43831b --- /dev/null +++ b/src/integrationTest/java/bio/terra/cda/app/model/SchemaObjectList.java @@ -0,0 +1,58 @@ +package bio.terra.cda.app.model; + +import java.util.List; + +public class SchemaObjectList { + private List result; + private String query_sql; + private int total_row_count; + private String next_url; + + public List getResult() { + return result; + } + + public void setResult(List result) { + this.result = result; + } + + public String getQuery_sql() { + return query_sql; + } + + public void setQuery_sql(String query_sql) { + this.query_sql = query_sql; + } + + public int getTotal_row_count() { + return total_row_count; + } + + public void setTotal_row_count(int total_row_count) { + this.total_row_count = total_row_count; + } + + public String getNext_url() { + return next_url; + } + + public void setNext_url(String next_url) { + this.next_url = next_url; + } + + @Override + public String toString() { + return "SchemaObjectList{" + + "result=" + + result + + ", query_sql='" + + query_sql + + '\'' + + ", total_row_count=" + + total_row_count + + ", next_url='" + + next_url + + '\'' + + '}'; + } +} diff --git a/src/integrationTest/java/bio/terra/cda/app/model/StatusMessage.java b/src/integrationTest/java/bio/terra/cda/app/model/StatusMessage.java new file mode 100644 index 00000000..115e51f5 --- /dev/null +++ b/src/integrationTest/java/bio/terra/cda/app/model/StatusMessage.java @@ -0,0 +1,3 @@ +package bio.terra.cda.app.model; + +public class StatusMessage {} diff --git a/src/integrationTest/java/bio/terra/cda/app/service/EndToEndIntegrationTests.java b/src/integrationTest/java/bio/terra/cda/app/service/EndToEndIntegrationTests.java new file mode 100644 index 00000000..980a35fc --- /dev/null +++ b/src/integrationTest/java/bio/terra/cda/app/service/EndToEndIntegrationTests.java @@ -0,0 +1,240 @@ +package bio.terra.cda.app.service; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; + +import bio.terra.cda.app.model.QueryResult; +import bio.terra.cda.app.model.SchemaObjectList; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.InputStream; +import java.util.Map; +import org.junit.jupiter.api.Test; + +public class EndToEndIntegrationTests { + + @Test + public void testApiStatusIT() throws Exception { + + /* curl -X GET "https://cda.cda-dev.broadinstitute.org/status" -H "accept: application/json" */ + try { + // create the process + ProcessBuilder build = + new ProcessBuilder( + "/usr/bin/curl", + "-X", + "GET", + "-H", + "accept: application/json", + "https://cda.cda-dev.broadinstitute.org/status"); + Process process = build.start(); + InputStream inputStream = process.getInputStream(); + + ObjectMapper mapper = new ObjectMapper(); + Map result = mapper.readValue(inputStream, Map.class); + assertThat(result.toString(), containsString("everything is fine")); + } catch (Exception e) { + System.out.println(e.getMessage()); + } + } + + /** curl -X POST "https://cda.cda-dev.broadinstitute.org/api/v1/unique-values/v3? */ + // @Disabled + @Test + public void testUniqueValuesApiIT() throws Exception { + try { + // create the process + ProcessBuilder build = + new ProcessBuilder( + "/usr/bin/curl", + "-X", + "POST", + "-H", + "Content-Type: text/plain", + "-H", + "accept: application/json", + "https://cda.cda-dev.broadinstitute.org/api/v1/unique-values/v3?tablename=gdc-bq-sample.cda_mvp", + "-d", + "sex"); + Process process = build.start(); + InputStream inputStream = process.getInputStream(); + + ObjectMapper mapper = new ObjectMapper(); + QueryResult qr = mapper.readValue(inputStream, QueryResult.class); + System.out.println(qr.getQuery_id()); + + String result = retrieveQueryIdResults(qr.getQuery_id()); + assertThat(result, containsString("female")); + + } catch (Exception e) { + System.out.println(e.getMessage()); + } + } + + /** + * Test the SQL-QUERY API with formatted SQL. curl -X POST + * https://cda.cda-dev.broadinstitute.org/api/v1/sql-query + */ + @Test + public void testSqlQueryApiIT() throws Exception { + String sql = + "SELECT * FROM `gdc-bq-sample.cda_mvp.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` WHERE table_name = 'v3' Limit 1"; + try { + // create the process + ProcessBuilder build = + new ProcessBuilder( + "/usr/bin/curl", + "-X", + "POST", + "-H", + "Content-Type: text/plain", + "-H", + "accept: application/json", + "https://cda.cda-dev.broadinstitute.org/api/v1/sql-query", + "-d", + sql); + Process process = build.start(); + InputStream inputStream = process.getInputStream(); + + ObjectMapper mapper = new ObjectMapper(); + QueryResult qr = mapper.readValue(inputStream, QueryResult.class); + + SchemaObjectList results = retrieveQueryMapResults(qr.getQuery_id()); + + assertThat(results.getResult().get(0).getColumn_name(), containsString("days_to_birth")); + + } catch (Exception e) { + System.out.println(e.getMessage()); + } + } + + /** + * Test the Boolean API with formatted json. curl -X POST + * https://cda.cda-dev.broadinstitute.org/api/v1/boolean-query + */ + @Test + public void testBooleanQueryApiIT() throws Exception { + String booleanQuery = + "{\"node_type\":\"AND\",\"l\":{\"node_type\":\"AND\",\"l\":{\"node_type\":\">\",\"l\":{\"node_type\":\"column\",\"value\":\"ResearchSubject.Diagnosis.age_at_diagnosis\"},\"r\":{\"node_type\":\"unquoted\",\"value\":\"50 * 365\"}},\"r\":{\"node_type\":\"=\",\"l\":{\"node_type\":\"column\",\"value\":\"ResearchSubject.Specimen.associated_project\"},\"r\":{\"node_type\":\"quoted\",\"value\":\"TCGA-ESCA\"}}},\"r\":{\"node_type\":\"=\",\"l\":{\"node_type\":\"column\",\"value\":\"ResearchSubject.Diagnosis.tumor_stage\"},\"r\":{\"node_type\":\"quoted\",\"value\":\"stage iiic\"}}}"; + + try { + // create the process + ProcessBuilder build = + new ProcessBuilder( + "/usr/bin/curl", + "-X", + "POST", + "-H", + "Content-Type: text/plain", + "-H", + "accept: application/json", + "https://cda.cda-dev.broadinstitute.org/api/v1/boolean-query/v3?table=gdc-bq-sample.cda_mvp", + "-d", + booleanQuery); + Process process = build.start(); + InputStream inputStream = process.getInputStream(); + System.out.println("command: " + build.command()); + + ObjectMapper mapper = new ObjectMapper(); + QueryResult qr = mapper.readValue(inputStream, QueryResult.class); + + System.out.println(qr.getQuery_id()); + + JsonNode jsonMap = retrieveQueryJsonResults(qr.getQuery_id()); + + System.out.println(jsonMap.get(0).toString()); + } catch (Exception e) { + System.out.println(e.getMessage()); + } + } + + /** + * This method retrieves the ResultSet from the Database using the query_id curl -X GET + * "https://cda.cda-dev.broadinstitute.org/api/v1/query/ + */ + private String retrieveQueryIdResults(String query_id) { + Map qr = null; + try { + // create the process + ProcessBuilder build = + new ProcessBuilder( + "/usr/bin/curl", + "-X", + "GET", + "-H", + "accept: application/json", + "https://cda.cda-dev.broadinstitute.org/api/v1/query/" + query_id + "?limit=100"); + + Process process = build.start(); + InputStream inputStream = process.getInputStream(); + + ObjectMapper mapper = new ObjectMapper(); + qr = mapper.readValue(inputStream, Map.class); + System.out.println("command: " + build.command()); + } catch (Exception e) { + System.out.println(e.getMessage()); + } + return qr.toString(); + } + + /** + * This method retrieves the ResultSet from the Database using the query_id curl -X GET + * "https://cda.cda-dev.broadinstitute.org/api/v1/query/ + */ + private SchemaObjectList retrieveQueryMapResults(String query_id) { + SchemaObjectList results = null; + try { + // create the process + ProcessBuilder build = + new ProcessBuilder( + "/usr/bin/curl", + "-X", + "GET", + "-H", + "accept: application/json", + "https://cda.cda-dev.broadinstitute.org/api/v1/query/" + query_id + "?limit=100"); + + Process process = build.start(); + InputStream inputStream = process.getInputStream(); + + ObjectMapper mapper = new ObjectMapper(); + results = mapper.readValue(inputStream, SchemaObjectList.class); + System.out.println("command: " + build.command()); + } catch (Exception e) { + System.out.println(e.getMessage()); + } + return results; + } + + /** + * This method retrieves the ResultSet from the Database using the query_id curl -X GET + * "https://cda.cda-dev.broadinstitute.org/api/v1/query/ + */ + private JsonNode retrieveQueryJsonResults(String query_id) { + JsonNode jsonMap = null; + JsonNode results = null; + try { + // create the process + ProcessBuilder build = + new ProcessBuilder( + "/usr/bin/curl", + "-X", + "GET", + "-H", + "accept: application/json", + "https://cda.cda-dev.broadinstitute.org/api/v1/query/" + query_id + "?limit=100"); + + Process process = build.start(); + InputStream inputStream = process.getInputStream(); + + ObjectMapper mapper = new ObjectMapper(); + jsonMap = mapper.readTree(inputStream); + results = jsonMap.get("result"); + System.out.println("command: " + build.command()); + + } catch (Exception e) { + System.out.println(e.getMessage()); + } + return results; + } +} diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index 66e879de..cfa8b0b0 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -248,7 +248,7 @@ components: name: limit schema: type: integer - default: 1000 + default: 100 description: The numbers of entries to return per page of data QueryId: in: path diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 9ea0ee6a..5a47e9ac 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,3 +1,3 @@ server.port=8080 cda.bqTable=gdc-bq-sample.integration -cda.datasetVersion=all_v2 \ No newline at end of file +cda.datasetVersion=all_v2 diff --git a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java index 2a0fc006..87ac58e7 100644 --- a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java +++ b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java @@ -66,6 +66,7 @@ public void uniqueValuesTest() throws Exception { String system = "GDC"; String body = "sex"; String table = "TABLE"; + var expected = "SELECT DISTINCT sex FROM TABLE.v3, UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier WHERE _identifier.system = 'GDC'"; var result = From b7bd7097ae669e9fe28d9266b5c6a5b162fe4188 Mon Sep 17 00:00:00 2001 From: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Date: Mon, 20 Dec 2021 12:12:02 -0500 Subject: [PATCH 08/18] Log4j2 fix (#134) * log4jSecurityFix * upgrade SpringBoot v2.4.3 to v2.5.4 --- build.gradle | 13 ++++++++++--- .../app/controller/GlobalExceptionHandlerTest.java | 13 ++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/build.gradle b/build.gradle index addcfe7c..551e3cd4 100644 --- a/build.gradle +++ b/build.gradle @@ -9,7 +9,7 @@ buildscript { plugins { id 'java' id 'idea' - id 'org.springframework.boot' version '2.4.3' + id 'org.springframework.boot' version '2.5.4' id 'io.spring.dependency-management' version '1.0.11.RELEASE' id 'com.google.cloud.tools.jib' version '2.8.0' id 'org.openapi.generator' version '5.1.1' @@ -18,11 +18,13 @@ plugins { id 'jacoco' } +ext['log4j2.version'] = '2.17.0' + group = 'bio.terra.cda' version = '2.0.4-SNAPSHOT' -sourceCompatibility = JavaVersion.VERSION_11 +//sourceCompatibility = JavaVersion.VERSION_11 repositories { mavenCentral() @@ -51,7 +53,7 @@ dependencies { // These dependencies are required to load the logback config file. implementation group: 'org.codehaus.groovy', name: 'groovy', version: '3.0.7' - implementation group: 'org.springframework.cloud', name: 'spring-cloud-gcp-starter-logging', version: '1.2.5.RELEASE' + implementation group: 'org.springframework.cloud', name: 'spring-cloud-gcp-starter-logging', version: '1.2.8.RELEASE' // -- OpenAPI CodeGen dependencies -- implementation group: 'io.swagger.core.v3', name: 'swagger-annotations', version: '2.1.7' @@ -71,6 +73,11 @@ dependencies { // Exclude the Spring logger, so everything will use SLF4J configurations.all { exclude group: "commons-logging", module: "commons-logging" + resolutionStrategy.eachDependency { DependencyResolveDetails details -> + if (details.requested.group == 'org.apache.logging.log4j') { + details.useVersion '2.17.0' + } + } } // OpenAPI Server Generation diff --git a/src/test/java/bio/terra/cda/app/controller/GlobalExceptionHandlerTest.java b/src/test/java/bio/terra/cda/app/controller/GlobalExceptionHandlerTest.java index 959b989c..cd0d7719 100644 --- a/src/test/java/bio/terra/cda/app/controller/GlobalExceptionHandlerTest.java +++ b/src/test/java/bio/terra/cda/app/controller/GlobalExceptionHandlerTest.java @@ -1,17 +1,16 @@ package bio.terra.cda.app.controller; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + import bio.terra.cda.common.exception.BadRequestException; import bio.terra.cda.common.exception.ErrorReportException; import bio.terra.cda.generated.model.ErrorReport; +import java.util.List; import org.junit.jupiter.api.Test; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; - class GlobalExceptionHandlerTest { @Test @@ -27,7 +26,7 @@ public List getCauses() { GlobalExceptionHandler handler = new GlobalExceptionHandler(); ResponseEntity report = handler.errorReportHandler(erx); - assertEquals (HttpStatus.BAD_REQUEST, report.getStatusCode()); + assertEquals(HttpStatus.BAD_REQUEST, report.getStatusCode()); } @Test @@ -50,6 +49,6 @@ public List getCauses() { GlobalExceptionHandler handler = new GlobalExceptionHandler(); ResponseEntity report = handler.validationExceptionHandler(erx); - assertEquals (HttpStatus.BAD_REQUEST, report.getStatusCode()); + assertEquals(HttpStatus.BAD_REQUEST, report.getStatusCode()); } } From 1a2f26905608f3d0a3249cf6c3efe23ed5c7fffb Mon Sep 17 00:00:00 2001 From: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Date: Mon, 20 Dec 2021 12:16:56 -0500 Subject: [PATCH 09/18] Update build.gradle bump version number --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 551e3cd4..12ad20f5 100644 --- a/build.gradle +++ b/build.gradle @@ -22,7 +22,7 @@ ext['log4j2.version'] = '2.17.0' group = 'bio.terra.cda' -version = '2.0.4-SNAPSHOT' +version = '2.0.5-SNAPSHOT' //sourceCompatibility = JavaVersion.VERSION_11 From c44b6b0436aa877944403503f7c564c8638ab534 Mon Sep 17 00:00:00 2001 From: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Date: Mon, 20 Dec 2021 12:47:35 -0500 Subject: [PATCH 10/18] Update build.gradle --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 12ad20f5..2ca32590 100644 --- a/build.gradle +++ b/build.gradle @@ -22,7 +22,7 @@ ext['log4j2.version'] = '2.17.0' group = 'bio.terra.cda' -version = '2.0.5-SNAPSHOT' +version = '2.0.6-SNAPSHOT' //sourceCompatibility = JavaVersion.VERSION_11 From 4d4265ea3f68490880e5f8ab1bd427df10bd1056 Mon Sep 17 00:00:00 2001 From: Dion Boles <82461519+dionboles-asym@users.noreply.github.com> Date: Thu, 17 Mar 2022 12:42:47 -0400 Subject: [PATCH 11/18] Pre Release (#155) * log4jSecurityFix (#133) * log4jSecurityFix * upgrade SpringBoot v2.4.3 to v2.5.4 * fix java version error (#135) * Added Global Counts * Adding IN keyword * T cell integration (#136) * fix java version error * updated log4j and staging endpoint * Integrate tCell using jib jvmArgs * Update README.md * Update README.md * Update README.md * update tcell location (#137) * fix code bug from sonarqube (#138) * update exception bug (#140) * Added Uppecase inside of the QueryTranslator and adding the building blocks for counts * Case insensitive query and counts endpoint called globelcounts works like boolean query. * Updated unit test * null check in global Exception handler * Updated endpoint v2.1.0 (#142) * Tagged version 2.1.0 * bump api version * fixing error with Int64 conversion * DDO-1914 Fixes to Github Actions Image Build and Deploy Workflow (#145) * fix build and deploy github action * formatting * fixes to GHA build and push workflow * no-op to trigger test workflow * only run on merge and disable dispatch to terra-dev for now * Add AppSec Trivy scanner (#141) Co-authored-by: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> * Performance timer tests it (#143) * PerformanceTests * in progress * Performance Tests working * Project property bugfix (#144) * in progress * in progress * fixed properties bug * add classpath to config * Added additional profiles * format changes * application dev update * profile updates for dsde-dev * in progress * fix swagger dropdown * add bootRunDev * Log timer aspect (#147) * Method execution timer aspect * TimerAspect * Execution Timer annotation * cleanup code * enable automated deploys to terra-dev environment (#148) * updating Translator with int check and add a bigquerySchema function * add tomcat Timeout * Fixed failing tests * update checks * Add files endpoint and refactor some code * added count rich update to the code * Changes to TableSchema * Remove some methods * add all_v2_1 schema and update gdc_subjects schema * Add select to query * Adding Like support * New view to remove files, some refactoring * Creating a destiantion tables * get counts from files table, refactored some counts code * added schema for v3 and new view * adding like support and testing sql cache using a map * add check for schema in sendQuery and fix some unit tests * fixed error with tableSchema to tableSchemaMap * adding record support * trying to bubble up a type error in the sql generator * bubble up some errors * operators split into separate classes: initial * More operator changes * couple of fixes and fixed unit tests * changing checks * updating python_client * Couple of sonarqube fixes * Remove stack traces * sonarqube changes * More sonarqube fixes * adding docker Co-authored-by: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Co-authored-by: mflinn-broad <60187023+mflinn-broad@users.noreply.github.com> Co-authored-by: Denis Loginov Co-authored-by: Frederick Kaufman Co-authored-by: rfricke-asymmetrik --- .github/workflows/master_push.yml | 77 +-- .github/workflows/trivy.yaml | 35 ++ .gitignore | 3 + README.md | 6 + build.gradle | 58 +- docker-compose.yml | 12 + dockerfile | 29 + .../java/bio/terra/cda/app/model/File.java | 140 +++++ .../bio/terra/cda/app/model/Identifier.java | 27 + .../bio/terra/cda/app/model/SchemaObject.java | 18 + .../terra/cda/app/model/SchemaObjectList.java | 6 +- .../app/service/EndToEndIntegrationTests.java | 2 +- .../service/PerformanceIntegrationTests.java | 172 ++++++ src/main/java/bio/terra/cda/app/Main.java | 16 + .../bio/terra/cda/app/aop/TimerAspect.java | 25 + .../terra/cda/app/aop/TrackExecutionTime.java | 10 + .../ApplicationConfiguration.java | 37 +- .../controller/GlobalExceptionHandler.java | 10 +- .../cda/app/controller/MetaApiController.java | 2 + .../app/controller/QueryApiController.java | 139 ++++- .../app/generators/CountsSqlGenerator.java | 135 +++++ .../cda/app/generators/FileSqlGenerator.java | 11 + .../cda/app/generators/SqlGenerator.java | 75 +++ .../cda/app/operators/BasicOperator.java | 25 + .../bio/terra/cda/app/operators/Column.java | 46 ++ .../java/bio/terra/cda/app/operators/In.java | 22 + .../bio/terra/cda/app/operators/Like.java | 16 + .../java/bio/terra/cda/app/operators/Not.java | 15 + .../app/operators/OperatorDeserializer.java | 81 +++ .../terra/cda/app/operators/QueryModule.java | 12 + .../cda/app/operators/QueryOperator.java | 13 + .../bio/terra/cda/app/operators/Quoted.java | 28 + .../bio/terra/cda/app/operators/Select.java | 14 + .../terra/cda/app/operators/SelectValues.java | 25 + .../app/operators/SingleSidedOperator.java | 14 + .../bio/terra/cda/app/operators/Unquoted.java | 22 + .../terra/cda/app/service/QueryService.java | 75 ++- .../terra/cda/app/util/QueryTranslator.java | 97 --- .../java/bio/terra/cda/app/util/SqlUtil.java | 27 + .../bio/terra/cda/app/util/TableSchema.java | 172 ++++++ src/main/resources/api/service_openapi.yaml | 174 ++++-- src/main/resources/application-dev.properties | 7 + .../resources/application-local.properties | 8 + .../resources/application-prod.properties | 8 + src/main/resources/application.properties | 8 +- src/main/resources/schema/GDC_Files.json | 474 +++++++++++++++ src/main/resources/schema/GDC_Subjects.json | 392 ++++++++++++ .../schema/GDC_Subjects_NoFiles.json | 315 ++++++++++ src/main/resources/schema/all_v2_1.json | 567 ++++++++++++++++++ src/main/resources/schema/all_v3_0_Files.json | 474 +++++++++++++++ .../schema/all_v3_0_subjects_meta.json | 315 ++++++++++ .../resources/schema/gdc_pdc_idc_v2_1.json | 567 ++++++++++++++++++ .../controller/QueryApiControllerTest.java | 13 +- .../cda/app/generators/SqlGeneratorTest.java | 80 +++ .../cda/app/service/QueryServiceTest.java | 4 + .../terra/cda/app/util/NestedColumnTest.java | 4 +- .../cda/app/util/QueryTranslatorTest.java | 118 ---- src/test/resources/query/query-ambiguous.json | 4 +- src/test/resources/query/query-not.json | 2 +- src/test/resources/query/query1.json | 4 +- src/test/resources/query/query2.json | 10 +- src/test/resources/query/query3.json | 4 +- src/test/resources/schema/TABLE.json | 77 +++ .../schema/all_v3_0_subjects_meta.json | 315 ++++++++++ 64 files changed, 5290 insertions(+), 403 deletions(-) create mode 100644 .github/workflows/trivy.yaml create mode 100644 docker-compose.yml create mode 100644 dockerfile create mode 100644 src/integrationTest/java/bio/terra/cda/app/model/File.java create mode 100644 src/integrationTest/java/bio/terra/cda/app/model/Identifier.java create mode 100644 src/integrationTest/java/bio/terra/cda/app/service/PerformanceIntegrationTests.java create mode 100644 src/main/java/bio/terra/cda/app/aop/TimerAspect.java create mode 100644 src/main/java/bio/terra/cda/app/aop/TrackExecutionTime.java create mode 100644 src/main/java/bio/terra/cda/app/generators/CountsSqlGenerator.java create mode 100644 src/main/java/bio/terra/cda/app/generators/FileSqlGenerator.java create mode 100644 src/main/java/bio/terra/cda/app/generators/SqlGenerator.java create mode 100644 src/main/java/bio/terra/cda/app/operators/BasicOperator.java create mode 100644 src/main/java/bio/terra/cda/app/operators/Column.java create mode 100644 src/main/java/bio/terra/cda/app/operators/In.java create mode 100644 src/main/java/bio/terra/cda/app/operators/Like.java create mode 100644 src/main/java/bio/terra/cda/app/operators/Not.java create mode 100644 src/main/java/bio/terra/cda/app/operators/OperatorDeserializer.java create mode 100644 src/main/java/bio/terra/cda/app/operators/QueryModule.java create mode 100644 src/main/java/bio/terra/cda/app/operators/QueryOperator.java create mode 100644 src/main/java/bio/terra/cda/app/operators/Quoted.java create mode 100644 src/main/java/bio/terra/cda/app/operators/Select.java create mode 100644 src/main/java/bio/terra/cda/app/operators/SelectValues.java create mode 100644 src/main/java/bio/terra/cda/app/operators/SingleSidedOperator.java create mode 100644 src/main/java/bio/terra/cda/app/operators/Unquoted.java delete mode 100644 src/main/java/bio/terra/cda/app/util/QueryTranslator.java create mode 100644 src/main/java/bio/terra/cda/app/util/SqlUtil.java create mode 100644 src/main/java/bio/terra/cda/app/util/TableSchema.java create mode 100644 src/main/resources/application-dev.properties create mode 100644 src/main/resources/application-local.properties create mode 100644 src/main/resources/application-prod.properties create mode 100644 src/main/resources/schema/GDC_Files.json create mode 100644 src/main/resources/schema/GDC_Subjects.json create mode 100644 src/main/resources/schema/GDC_Subjects_NoFiles.json create mode 100644 src/main/resources/schema/all_v2_1.json create mode 100644 src/main/resources/schema/all_v3_0_Files.json create mode 100644 src/main/resources/schema/all_v3_0_subjects_meta.json create mode 100644 src/main/resources/schema/gdc_pdc_idc_v2_1.json create mode 100644 src/test/java/bio/terra/cda/app/generators/SqlGeneratorTest.java delete mode 100644 src/test/java/bio/terra/cda/app/util/QueryTranslatorTest.java create mode 100644 src/test/resources/schema/TABLE.json create mode 100644 src/test/resources/schema/all_v3_0_subjects_meta.json diff --git a/.github/workflows/master_push.yml b/.github/workflows/master_push.yml index ba595658..55ea230b 100644 --- a/.github/workflows/master_push.yml +++ b/.github/workflows/master_push.yml @@ -7,12 +7,10 @@ on: paths-ignore: - 'README.md' - '.github/**' + env: - SERVICE_NAME: ${{ github.event.repository.name }} - GOOGLE_PROJECT: terra-kernel-k8s - GKE_CLUSTER: terra-kernel-k8s - VAULT_PATH_GCR: secret/dsde/terra/kernel/test - VAULT_ADDR: https://clotho.broadinstitute.org:8200 + SERVICE_NAME: cancerdata + GOOGLE_PROJECT: broad-dsp-gcr-public jobs: tag-build-push: runs-on: ubuntu-latest @@ -21,61 +19,40 @@ jobs: uses: actions/checkout@master - name: Bump version and push tag id: tag - uses: broadinstitute/github-tag-action@master + uses: databiosphere/github-actions/actions/bumper@bumper-0.0.4 env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - WITH_V: false - - name: Pull Vault image - run: docker pull vault:1.1.0 - # Currently, there's no way to add capabilities to Docker actions on Git, and Vault needs IPC_LOCK to run. - - name: Get Vault token - id: vault-token-step - run: | - VAULT_TOKEN=$(docker run --rm --cap-add IPC_LOCK \ - -e "VAULT_ADDR=${VAULT_ADDR}" \ - vault:1.1.0 \ - vault write -field token \ - auth/approle/login role_id=${{ secrets.VAULT_APPROLE_ROLE_ID }} \ - secret_id=${{ secrets.VAULT_APPROLE_SECRET_ID }}) - echo ::set-output name=vault-token::$VAULT_TOKEN - echo ::add-mask::$VAULT_TOKEN - - name: Get Vault secrets - id: vault-secret-step - run: | - GCR_EMAIL=$(docker run --rm --cap-add IPC_LOCK \ - -e "VAULT_TOKEN=${{ steps.vault-token-step.outputs.vault-token }}" \ - -e "VAULT_ADDR=${VAULT_ADDR}" \ - vault:1.1.0 \ - vault read -field ci-gcr-sa-email ${VAULT_PATH_GCR}) - GCR_KEY=$(docker run --rm --cap-add IPC_LOCK \ - -e "VAULT_TOKEN=${{ steps.vault-token-step.outputs.vault-token }}" \ - -e "VAULT_ADDR=${VAULT_ADDR}" \ - vault:1.1.0 \ - vault read -field ci-gcr-sa-key ${VAULT_PATH_GCR}) - echo ::set-output name=gcr-email::$GCR_EMAIL - echo ::add-mask::$GCR_EMAIL - echo ::set-output name=gcr-key::$GCR_KEY - echo ::add-mask::$GCR_KEY - - name: Auth to GCR - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master + DEFAULT_BUMP: patch + GITHUB_TOKEN: ${{ secrets.BROADBOT_TOKEN }} + RELEASE_BRANCHES: master + WITH_V: true + + - name: Setup gcloud + uses: google-github-actions/setup-gcloud@v0.3.0 with: - version: '270.0.0' - service_account_email: ${{ steps.vault-secret-step.outputs.gcr-email }} - service_account_key: ${{ steps.vault-secret-step.outputs.gcr-key }} + service_account_key: ${{ secrets.GCR_PUBLISH_KEY_B64 }} + - name: Explicitly auth Docker for GCR run: gcloud auth configure-docker --quiet + - name: Set up JDK 1.8 - uses: actions/setup-java@v1 + uses: actions/setup-java@v2 with: - java-version: 1.8 + java-version: '11' + distribution: 'temurin' + - name: Grant execute permission for gradlew run: chmod +x gradlew + - name: Build and push GCR image using Jib run: "./gradlew jib --image=gcr.io/${GOOGLE_PROJECT}/${SERVICE_NAME}:${{ steps.tag.outputs.tag }}" - - name: Update Version Mapping + env: + SERVICE_VERSION: ${{ steps.tag.outputs.tag }} + + - name: Deploy to Terra Dev environment + if: github.event_name == 'push' uses: broadinstitute/repository-dispatch@master with: - token: ${{ secrets.REPO_ACCESS_TOKEN }} + token: ${{ secrets.BROADBOT_TOKEN }} repository: broadinstitute/terra-helmfile - event-type: version-bump - client-payload: '{"service": "poc", "version": "${{ steps.tag.outputs.tag }}"}' + event-type: update-service + client-payload: '{"service": "cancerdata", "version": "${{ steps.tag.outputs.tag }}"}' diff --git a/.github/workflows/trivy.yaml b/.github/workflows/trivy.yaml new file mode 100644 index 00000000..6838fbd2 --- /dev/null +++ b/.github/workflows/trivy.yaml @@ -0,0 +1,35 @@ +name: dsp-appsec-trivy +on: [pull_request] + +jobs: + appsec-trivy: + # Parse Dockerfile and build, scan image if a "blessed" base image is not used + name: DSP AppSec Trivy check + runs-on: ubuntu-latest + env: + # only used to name the image for this scan; it is not pushed anywhere + image: databiosphere/terra-external-credentials-manager + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v2 + with: + java-version: '11' + distribution: 'temurin' + + - name: Gradle cache + uses: actions/cache@v2 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: gradle- + + - name: Build image locally with Jib + run: | + ./gradlew jibDockerBuild --image="${image}" + + - uses: broadinstitute/dsp-appsec-trivy-action@v1 + with: + image: ${{ env.image }} diff --git a/.gitignore b/.gitignore index 6ff2292e..ecf552d9 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,9 @@ local-dev/ *.ipr out/ +### tCell agent ### +tcell* + ### NetBeans ### /nbproject/private/ /nbbuild/ diff --git a/README.md b/README.md index 17bc0f88..be98a04f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ + # cda-service This repository started as a clone of the [kernel-service-poc](https://github.com/DataBiosphere/kernel-service-poc) project. +## Sonarqube Static Code Analysis +Clicking on the following image will take you to the CDA Sonarqube code analysis dashboard. +
+[![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=CancerDataAggregator_cda-service&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=CancerDataAggregator_cda-service) + ## Getting Started (macOS) Building and running locally requires JDK 11 and gradle. On a Mac, you can use [brew](https://brew.sh/) diff --git a/build.gradle b/build.gradle index cc2dd65d..814bcd9f 100644 --- a/build.gradle +++ b/build.gradle @@ -4,6 +4,9 @@ buildscript { repositories { mavenCentral() } + dependencies { + classpath("org.springframework.boot:spring-boot-gradle-plugin:2.4.3.RELEASE") + } } plugins { @@ -18,10 +21,11 @@ plugins { id 'jacoco' } -ext['log4j2.version'] = '2.17.0' +ext['log4j2.version'] = '2.17.1' group = 'bio.terra.cda' -version = '0.0.22.5-SNAPSHOT' + +version = '2.1.0' sourceCompatibility = JavaVersion.VERSION_11 repositories { @@ -45,7 +49,7 @@ dependencies { implementation group: 'org.springframework.boot', name: 'spring-boot-starter-validation' implementation group: 'javax.validation', name: 'validation-api' implementation group: 'org.webjars', name: 'swagger-ui', version: '3.24.0' - + implementation group: 'org.aspectj', name: 'aspectjweaver', version: '1.8.8' implementation group: 'com.google.cloud', name: 'google-cloud-bigquery', version: '1.124.7' // These dependencies are required to load the logback config file. @@ -72,7 +76,7 @@ configurations.all { exclude group: "commons-logging", module: "commons-logging" resolutionStrategy.eachDependency { DependencyResolveDetails details -> if (details.requested.group == 'org.apache.logging.log4j') { - details.useVersion '2.17.0' + details.useVersion '2.17.1' } } } @@ -107,13 +111,25 @@ openApiGenerate { ] } +task GetSchema(type: Exec){ + commandLine "bq show --schema --format=prettyjson gdc-bq-sample:integration.all_v2_1 > schema.json" +} + +task GetFilesSchema(type: Exec){ + commandLine "bq show --schema --format=prettyjson gdc-bq-sample:dev.all_v3_0_Files > all_v3_0_Files.json" +} + +task GetSubjectViewSchema(type: Exec){ + commandLine "bq show --schema --format=prettyjson gdc-bq-sample:dev.all_v3_0_subjects_meta > all_v3_0_subjects_meta.json" +} + task buildPythonSdk(type: GenerateTask){ generatorName = "python" packageName = "cda_client" inputSpec = "${openapiSourceFile}".toString() outputDir = "${openapiClientTargetDir}".toString() configOptions = [ - "packageVersion" : "9.15.2021", + "packageVersion" : "3.15.2022", ] } @@ -129,6 +145,38 @@ task integrationTest(type: Test) { mustRunAfter test } +jib { + from { + image = 'us.gcr.io/broad-dsp-gcr-public/base/jre:11-distroless' + } + container { + environment = [ 'spring.profiles.active':'dev' ] + + } +} + +tasks.register("bootRunLocal") { + group = "application" + description = "Runs the Spring Boot application with the local profile" + doFirst { + tasks.bootRun.configure { + systemProperty("spring.profiles.active", "local") + } + } + finalizedBy("bootRun") +} + +tasks.register("bootRunDev") { + group = "application" + description = "Runs the Spring Boot application with the dev profile" + doFirst { + tasks.bootRun.configure { + systemProperty("spring.profiles.active", "dev") + } + } + finalizedBy("bootRun") +} + check.dependsOn integrationTest openApiValidate { diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..c7eb3fae --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,12 @@ +version: '2' +services: + boot-web: + build: ./ + image: spring-boot-gradle-docker + restart: always + ports: + - 8080:8080 + logging: + options: + max-size: "10m" + max-file: "10" \ No newline at end of file diff --git a/dockerfile b/dockerfile new file mode 100644 index 00000000..c1a48b38 --- /dev/null +++ b/dockerfile @@ -0,0 +1,29 @@ +FROM gradle:6.9-jdk11-alpine AS TEMP_BUILD_IMAGE +ENV APP_HOME=/usr/app/ +WORKDIR $APP_HOME +COPY . . +RUN set -eux; \ + ./gradlew build +# COPY build.gradle settings.gradle $APP_HOME +# +# COPY gradle $APP_HOME/gradle +# COPY --chown=gradle:gradle . /home/gradle/src +# USER root +# RUN chown -R gradle /home/gradle/src +# +# RUN gradle build || return 0 +# COPY . . +# RUN gradle clean build + + +# actual container + +FROM adoptopenjdk/openjdk11:alpine-jre AS FINAL +ENV APP_HOME=/usr/app/ + +WORKDIR $APP_HOME +COPY --from=TEMP_BUILD_IMAGE $APP_HOME/build/libs/cda-service-2.1.0.jar . + +EXPOSE 8080 +CMD ["java" , "-jar","cda-service-2.1.0.jar"] + diff --git a/src/integrationTest/java/bio/terra/cda/app/model/File.java b/src/integrationTest/java/bio/terra/cda/app/model/File.java new file mode 100644 index 00000000..7f6aaa0e --- /dev/null +++ b/src/integrationTest/java/bio/terra/cda/app/model/File.java @@ -0,0 +1,140 @@ +package bio.terra.cda.app.model; + +public class File { + private String id; + private Identifier identifier; + private String label; + private String data_category; + private String data_type; + private String associated_project; + private String drs_uri; + private long byte_size; + private String data_modality; + private String imaging_modality; + private String dbgap_accession_number; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Identifier getIdentifier() { + return identifier; + } + + public void setIdentifier(Identifier identifier) { + this.identifier = identifier; + } + + public String getLabel() { + return label; + } + + public void setLabel(String label) { + this.label = label; + } + + public String getData_category() { + return data_category; + } + + public void setData_category(String data_category) { + this.data_category = data_category; + } + + public String getData_type() { + return data_type; + } + + public void setData_type(String data_type) { + this.data_type = data_type; + } + + public String getAssociated_project() { + return associated_project; + } + + public void setAssociated_project(String associated_project) { + this.associated_project = associated_project; + } + + public String getDrs_uri() { + return drs_uri; + } + + public void setDrs_uri(String drs_uri) { + this.drs_uri = drs_uri; + } + + public long getByte_size() { + return byte_size; + } + + public void setByte_size(long byte_size) { + this.byte_size = byte_size; + } + + public String getData_modality() { + return data_modality; + } + + public void setData_modality(String data_modality) { + this.data_modality = data_modality; + } + + public String getImaging_modality() { + return imaging_modality; + } + + public void setImaging_modality(String imaging_modality) { + this.imaging_modality = imaging_modality; + } + + public String getDbgap_accession_number() { + return dbgap_accession_number; + } + + public void setDbgap_accession_number(String dbgap_accession_number) { + this.dbgap_accession_number = dbgap_accession_number; + } + + @Override + public String toString() { + return "File{" + + "id='" + + id + + '\'' + + ", identifier=" + + identifier + + ", label='" + + label + + '\'' + + ", data_category='" + + data_category + + '\'' + + ", data_type='" + + data_type + + '\'' + + ", associated_project='" + + associated_project + + '\'' + + ", drs_uri='" + + drs_uri + + '\'' + + ", byte_size=" + + byte_size + + ", data_modality='" + + data_modality + + '\'' + + ", imaging_modality='" + + imaging_modality + + '\'' + + ", dbgap_accession_number='" + + dbgap_accession_number + + '\'' + + '}'; + } +} diff --git a/src/integrationTest/java/bio/terra/cda/app/model/Identifier.java b/src/integrationTest/java/bio/terra/cda/app/model/Identifier.java new file mode 100644 index 00000000..7707f775 --- /dev/null +++ b/src/integrationTest/java/bio/terra/cda/app/model/Identifier.java @@ -0,0 +1,27 @@ +package bio.terra.cda.app.model; + +public class Identifier { + private String system; + private String value; + + public String getSystem() { + return system; + } + + public void setSystem(String system) { + this.system = system; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } + + @Override + public String toString() { + return "Identifier{" + "system='" + system + '\'' + ", value='" + value + '\'' + '}'; + } +} diff --git a/src/integrationTest/java/bio/terra/cda/app/model/SchemaObject.java b/src/integrationTest/java/bio/terra/cda/app/model/SchemaObject.java index ec597a69..cb0b6ac1 100644 --- a/src/integrationTest/java/bio/terra/cda/app/model/SchemaObject.java +++ b/src/integrationTest/java/bio/terra/cda/app/model/SchemaObject.java @@ -1,6 +1,8 @@ package bio.terra.cda.app.model; public class SchemaObject { + private String id; + private Identifier identifier; private String table_catalog; private String table_schema; private String table_name; @@ -65,6 +67,22 @@ public void setDescription(String description) { this.description = description; } + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Identifier getIdentifier() { + return identifier; + } + + public void setIdentifier(Identifier identifier) { + this.identifier = identifier; + } + @Override public String toString() { return "SchemaObject{" diff --git a/src/integrationTest/java/bio/terra/cda/app/model/SchemaObjectList.java b/src/integrationTest/java/bio/terra/cda/app/model/SchemaObjectList.java index 7b43831b..76d90a86 100644 --- a/src/integrationTest/java/bio/terra/cda/app/model/SchemaObjectList.java +++ b/src/integrationTest/java/bio/terra/cda/app/model/SchemaObjectList.java @@ -3,16 +3,16 @@ import java.util.List; public class SchemaObjectList { - private List result; + private List result; private String query_sql; private int total_row_count; private String next_url; - public List getResult() { + public List getResult() { return result; } - public void setResult(List result) { + public void setResult(List result) { this.result = result; } diff --git a/src/integrationTest/java/bio/terra/cda/app/service/EndToEndIntegrationTests.java b/src/integrationTest/java/bio/terra/cda/app/service/EndToEndIntegrationTests.java index 980a35fc..5a74430f 100644 --- a/src/integrationTest/java/bio/terra/cda/app/service/EndToEndIntegrationTests.java +++ b/src/integrationTest/java/bio/terra/cda/app/service/EndToEndIntegrationTests.java @@ -101,7 +101,7 @@ public void testSqlQueryApiIT() throws Exception { SchemaObjectList results = retrieveQueryMapResults(qr.getQuery_id()); - assertThat(results.getResult().get(0).getColumn_name(), containsString("days_to_birth")); + assertThat(results.getResult().get(0).toString(), containsString("days_to_birth")); } catch (Exception e) { System.out.println(e.getMessage()); diff --git a/src/integrationTest/java/bio/terra/cda/app/service/PerformanceIntegrationTests.java b/src/integrationTest/java/bio/terra/cda/app/service/PerformanceIntegrationTests.java new file mode 100644 index 00000000..07e523f5 --- /dev/null +++ b/src/integrationTest/java/bio/terra/cda/app/service/PerformanceIntegrationTests.java @@ -0,0 +1,172 @@ +package bio.terra.cda.app.service; + +import bio.terra.cda.app.model.QueryResult; +import bio.terra.cda.app.model.SchemaObjectList; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.InputStream; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class PerformanceIntegrationTests { + private List queryList; + private long totalExecutionTime = 0; + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + Date date = new Date(); + + private final String sql1 = + "SELECT * FROM `gdc-bq-sample.cda_mvp.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` " + + "WHERE table_name = 'v3'"; + + private final String sql2 = + "SELECT all_v2_1.* FROM gdc-bq-sample.integration.all_v2_1 AS all_v2_1 " + + "WHERE (all_v2_1.id = 'TCGA-E2-A10A')"; + private final String sql3 = + "SELECT all_v2_1.* FROM (SELECT all_v2_1.* FROM (SELECT all_v2_1.* FROM gdc-bq-sample.integration.all_v2_1 AS all_v2_1, " + + "UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier " + + "WHERE (_identifier.system = 'GDC')) AS all_v2_1, UNNEST(ResearchSubject) AS _ResearchSubject, " + + "UNNEST(_ResearchSubject.identifier) AS _identifier " + + "WHERE (_identifier.system = 'PDC')) AS all_v2_1, UNNEST(identifier) AS _identifier " + + "WHERE (_identifier.system = 'IDC')"; + private final String sql4 = + "SELECT COUNT(all_v2_1) FROM" + + " gdc-bq-sample.integration.all_v2_1 AS all_v2_1\n" + + "WHERE (all_v2_1.id = 'TCGA-E2-A10A')"; + private final String sql5 = + "SELECT(SUM((SELECT COUNT(system) FROM UNNEST(identifier) WHERE system = 'GDC'))) AS GDC," + + "(SUM((SELECT COUNT(system) FROM UNNEST(identifier) WHERE system = 'PDC'))) AS PDC," + + "(SUM((SELECT COUNT(system) FROM UNNEST(identifier) WHERE system = 'IDC'))) " + + "AS IDC From (SELECT identifier FROM gdc-bq-sample.integration.all_v2_1), UNNEST(identifier)"; + private final String sql6 = + "SELECT all_v2_1.* FROM gdc-bq-sample.integration.all_v2_1 AS all_v2_1, " + + "UNNEST(ResearchSubject) AS _ResearchSubject, " + + "UNNEST(_ResearchSubject.Specimen) AS _Specimen, " + + "UNNEST(_ResearchSubject.Diagnosis) AS _Diagnosis, " + + "UNNEST(_Specimen.identifier) AS _identifier " + + "WHERE ((_Specimen.primary_disease_type = 'Nevi and Melanomas') " + + "AND ((_Diagnosis.age_at_diagnosis < 30*365) AND (_identifier.system = 'GDC')))"; + private final String sql7 = + "SELECT DISTINCT _Specimen.associated_project FROM integration.all_v2_1, " + + "UNNEST(ResearchSubject) AS _ResearchSubject, " + + "UNNEST(_ResearchSubject.Specimen) " + + "AS _Specimen ORDER BY _Specimen.associated_project"; + private final String sql8 = + "SELECT all_v2_1.* FROM gdc-bq-sample.integration.all_v2_1 AS all_v2_1, " + + "UNNEST(ResearchSubject) AS _ResearchSubject, " + + "UNNEST(_ResearchSubject.Diagnosis) AS _Diagnosis " + + "WHERE ((_Diagnosis.age_at_diagnosis > 50*365) " + + "AND (_ResearchSubject.member_of_research_project = 'TCGA-OV'))"; + private final String sql9 = + "SELECT DISTINCT vital_status FROM integration.all_v2_1 ORDER BY vital_status"; + private final String sql10 = + "SELECT all_v2_1.* FROM gdc-bq-sample.integration.all_v2_1 AS all_v2_1, " + + "UNNEST(ResearchSubject) AS _ResearchSubject " + + "WHERE ((all_v2_1.sex = 'female') " + + "AND ((_ResearchSubject.primary_diagnosis_condition = 'Breast Invasive Carcinoma') " + + "AND ((all_v2_1.days_to_birth <= -30*365) AND (all_v2_1.days_to_birth >= -45*365))))"; + + @BeforeAll + public void init() { + queryList = new ArrayList(); + queryList.add(sql1); + queryList.add(sql2); + queryList.add(sql3); + queryList.add(sql4); + queryList.add(sql5); + queryList.add(sql6); + queryList.add(sql7); + queryList.add(sql8); + queryList.add(sql9); + queryList.add(sql10); + } + + @Test + public void runPerformanceTests() throws Exception { + for (String query : queryList) { + long startTime = System.currentTimeMillis(); + runSql(query); + long endTime = System.currentTimeMillis(); + long totalTime = endTime - startTime; + System.out.print("Query: " + query); + System.out.println("\nExecution time in milliseconds : " + totalTime + "ms\n"); + totalExecutionTime += totalTime; + } + System.out.println( + "\nPerformance Report for " + + dateFormat.format(date) + + " Optimal performance is <= 80secs"); + System.out.println( + "*** Total Execution for 10 sequential queries in seconds: " + + totalExecutionTime / 1000 + + "secs"); + } + + /** + * Test the SQL-QUERY API with formatted SQL. curl -X POST + * https://cda.cda-dev.broadinstitute.org/api/v1/sql-query + */ + public void runSql(String sql) { + + try { + // create the process + ProcessBuilder build = + new ProcessBuilder( + "/usr/bin/curl", + "-X", + "POST", + "-H", + "Content-Type: text/plain", + "-H", + "accept: application/json", + "https://cda.cda-dev.broadinstitute.org/api/v1/sql-query", + "-d", + sql); + Process process = build.start(); + InputStream inputStream = process.getInputStream(); + + ObjectMapper mapper = new ObjectMapper(); + QueryResult qr = mapper.readValue(inputStream, QueryResult.class); + System.out.println(qr.toString()); + retrieveQueryMapResults(qr.getQuery_id()); + + } catch (Exception e) { + System.out.println(e.getMessage()); + } + } + + /** + * This method retrieves the ResultSet from the Database using the query_id curl -X GET + * "https://cda.cda-dev.broadinstitute.org/api/v1/query/ + */ + private SchemaObjectList retrieveQueryMapResults(String query_id) { + SchemaObjectList results = null; + try { + // create the process + ProcessBuilder build = + new ProcessBuilder( + "/usr/bin/curl", + "-X", + "GET", + "-H", + "accept: application/json", + "https://cda.cda-dev.broadinstitute.org/api/v1/query/" + query_id + "?limit=100"); + + Process process = build.start(); + InputStream inputStream = process.getInputStream(); + + ObjectMapper mapper = new ObjectMapper(); + results = mapper.readValue(inputStream, SchemaObjectList.class); + System.out.println("command: " + build.command()); + System.out.println("RESULTS Row Length: " + results.getResult().size()); + System.out.println("Results Total Row Count: " + results.getTotal_row_count()); + } catch (Exception e) { + System.out.println(e.getMessage()); + } + return results; + } +} diff --git a/src/main/java/bio/terra/cda/app/Main.java b/src/main/java/bio/terra/cda/app/Main.java index a8aa6013..20138584 100644 --- a/src/main/java/bio/terra/cda/app/Main.java +++ b/src/main/java/bio/terra/cda/app/Main.java @@ -1,18 +1,34 @@ package bio.terra.cda.app; +import bio.terra.cda.app.configuration.ApplicationConfiguration; +import org.springframework.beans.factory.SmartInitializingSingleton; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.cache.annotation.EnableCaching; +import org.springframework.context.ApplicationContext; +import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ComponentScan; import org.springframework.scheduling.annotation.EnableScheduling; @SpringBootApplication(exclude = {DataSourceAutoConfiguration.class}) @ComponentScan(basePackages = "bio.terra.cda") +@EnableConfigurationProperties(ApplicationConfiguration.class) @EnableScheduling @EnableCaching public class Main { public static void main(String[] args) { SpringApplication.run(Main.class, args); } + + // This is a "magic bean": It supplies a method that Spring calls after the application is setup, + // but before the port is opened for business. That lets us do database migration and stairway + // initialization on a system that is otherwise fully configured. The rule of thumb is that all + // bean initialization should avoid database access. If there is additional database work to be + // done, it should happen inside this method. + @Bean + public SmartInitializingSingleton postSetupInitialization(ApplicationContext applicationContext) { + return () -> StartupInitializer.initialize(applicationContext); + } } diff --git a/src/main/java/bio/terra/cda/app/aop/TimerAspect.java b/src/main/java/bio/terra/cda/app/aop/TimerAspect.java new file mode 100644 index 00000000..a9a95a20 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/aop/TimerAspect.java @@ -0,0 +1,25 @@ +package bio.terra.cda.app.aop; + +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; + +@Aspect +@Component +public class TimerAspect { + + private static final Logger logger = LoggerFactory.getLogger(TimerAspect.class); + + @Around("@annotation(bio.terra.cda.app.aop.TrackExecutionTime)") + public Object executionTimer(ProceedingJoinPoint joinPoint) throws Throwable { + long start = System.currentTimeMillis(); + Object proceed = joinPoint.proceed(); + long executionTime = System.currentTimeMillis() - start; + logger.info( + "--Execution Timer: " + joinPoint.getSignature() + " executed in " + executionTime + "ms"); + return proceed; + } +} diff --git a/src/main/java/bio/terra/cda/app/aop/TrackExecutionTime.java b/src/main/java/bio/terra/cda/app/aop/TrackExecutionTime.java new file mode 100644 index 00000000..bf9899e3 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/aop/TrackExecutionTime.java @@ -0,0 +1,10 @@ +package bio.terra.cda.app.aop; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +public @interface TrackExecutionTime {} diff --git a/src/main/java/bio/terra/cda/app/configuration/ApplicationConfiguration.java b/src/main/java/bio/terra/cda/app/configuration/ApplicationConfiguration.java index f9c3e238..0a2364bc 100644 --- a/src/main/java/bio/terra/cda/app/configuration/ApplicationConfiguration.java +++ b/src/main/java/bio/terra/cda/app/configuration/ApplicationConfiguration.java @@ -1,31 +1,34 @@ package bio.terra.cda.app.configuration; -import bio.terra.cda.app.StartupInitializer; +import bio.terra.cda.app.operators.QueryModule; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import com.fasterxml.jackson.module.paramnames.ParameterNamesModule; -import org.springframework.beans.factory.SmartInitializingSingleton; +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.BigQueryOptions; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.context.properties.ConfigurationProperties; -import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.cache.CacheManager; import org.springframework.cache.concurrent.ConcurrentMapCacheManager; -import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.EnableTransactionManagement; +@Component @Configuration -@EnableConfigurationProperties @EnableTransactionManagement @ConfigurationProperties(prefix = "cda") public class ApplicationConfiguration { // Configurable properties - private String bqTable; private String datasetVersion; + @Value("${project:default}") + private String project; + public String getBqTable() { return bqTable; } @@ -42,6 +45,14 @@ public void setDatasetVersion(String datasetVersion) { this.datasetVersion = datasetVersion; } + public String getProject() { + return project; + } + + public void setProject(String project) { + this.project = project; + } + @Bean public CacheManager cacheManager() { return new ConcurrentMapCacheManager("system-status"); @@ -52,16 +63,12 @@ public ObjectMapper objectMapper() { return new ObjectMapper() .registerModule(new ParameterNamesModule()) .registerModule(new Jdk8Module()) - .registerModule(new JavaTimeModule()); + .registerModule(new JavaTimeModule()) + .registerModule(new QueryModule()); } - // This is a "magic bean": It supplies a method that Spring calls after the application is setup, - // but before the port is opened for business. That lets us do database migration and stairway - // initialization on a system that is otherwise fully configured. The rule of thumb is that all - // bean initialization should avoid database access. If there is additional database work to be - // done, it should happen inside this method. - @Bean - public SmartInitializingSingleton postSetupInitialization(ApplicationContext applicationContext) { - return () -> StartupInitializer.initialize(applicationContext); + @Bean("bigQuery") + public BigQuery bigQuery() { + return BigQueryOptions.newBuilder().setProjectId(project).build().getService(); } } diff --git a/src/main/java/bio/terra/cda/app/controller/GlobalExceptionHandler.java b/src/main/java/bio/terra/cda/app/controller/GlobalExceptionHandler.java index b6a5d589..eee8a814 100644 --- a/src/main/java/bio/terra/cda/app/controller/GlobalExceptionHandler.java +++ b/src/main/java/bio/terra/cda/app/controller/GlobalExceptionHandler.java @@ -40,7 +40,8 @@ public ResponseEntity validationExceptionHandler(Exception ex) { return buildErrorReport(ex, HttpStatus.BAD_REQUEST, null); } - // -- catchall - log so we can understand what we have missed in the handlers above + // -- catchall - log so we can understand what we have missed in the handlers + // above @ExceptionHandler(Exception.class) public ResponseEntity catchallHandler(Exception ex) { logger.error("Exception caught by catchall hander", ex); @@ -60,8 +61,11 @@ private ResponseEntity buildErrorReport( if (causes == null) { causes = collectCauses; } - ErrorReport errorReport = - new ErrorReport().message(ex.getMessage()).statusCode(statusCode.value()).causes(causes); + ErrorReport errorReport = null; + if (ex != null) { + errorReport = + new ErrorReport().message(ex.getMessage()).statusCode(statusCode.value()).causes(causes); + } return new ResponseEntity<>(errorReport, statusCode); } } diff --git a/src/main/java/bio/terra/cda/app/controller/MetaApiController.java b/src/main/java/bio/terra/cda/app/controller/MetaApiController.java index c873bcb5..40806966 100644 --- a/src/main/java/bio/terra/cda/app/controller/MetaApiController.java +++ b/src/main/java/bio/terra/cda/app/controller/MetaApiController.java @@ -1,5 +1,6 @@ package bio.terra.cda.app.controller; +import bio.terra.cda.app.aop.TrackExecutionTime; import bio.terra.cda.app.configuration.ApplicationConfiguration; import bio.terra.cda.app.service.QueryService; import bio.terra.cda.generated.controller.MetaApi; @@ -28,6 +29,7 @@ public MetaApiController(ApplicationConfiguration applicationConfiguration) { this.applicationConfiguration = applicationConfiguration; } + @TrackExecutionTime @Override public ResponseEntity serviceStatus() { return ResponseEntity.ok(queryService.bigQueryCheck()); diff --git a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java index c58aa193..3a0834b9 100644 --- a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java +++ b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java @@ -1,11 +1,21 @@ package bio.terra.cda.app.controller; +import bio.terra.cda.app.aop.TrackExecutionTime; import bio.terra.cda.app.configuration.ApplicationConfiguration; +import bio.terra.cda.app.generators.CountsSqlGenerator; +import bio.terra.cda.app.generators.FileSqlGenerator; +import bio.terra.cda.app.generators.SqlGenerator; import bio.terra.cda.app.service.QueryService; +import bio.terra.cda.app.service.exception.BadQueryException; import bio.terra.cda.app.util.NestedColumn; -import bio.terra.cda.app.util.QueryTranslator; +import bio.terra.cda.app.util.TableSchema; import bio.terra.cda.generated.controller.QueryApi; -import bio.terra.cda.generated.model.*; +import bio.terra.cda.generated.model.JobStatusData; +import bio.terra.cda.generated.model.Query; +import bio.terra.cda.generated.model.QueryCreatedData; +import bio.terra.cda.generated.model.QueryResponseData; +import com.google.cloud.bigquery.BigQueryException; +import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Collections; @@ -22,6 +32,7 @@ @Controller public class QueryApiController implements QueryApi { private static final Logger logger = LoggerFactory.getLogger(QueryApiController.class); + private static final String INVALID_DATABASE = "Unable to find schema for that version."; private final QueryService queryService; private final ApplicationConfiguration applicationConfiguration; @@ -49,20 +60,24 @@ private String createNextUrl(String jobId, int offset, int limit) { } } + @TrackExecutionTime @Override public ResponseEntity query(String id, Integer offset, Integer limit) { var result = queryService.getQueryResults(id, offset, limit); - var response = new QueryResponseData() - .result(Collections.unmodifiableList(result.items)) - .totalRowCount(result.totalRowCount) - .querySql(result.querySql); + var response = + new QueryResponseData() + .result(Collections.unmodifiableList(result.items)) + .totalRowCount(result.totalRowCount) + .querySql(result.querySql); int nextPage = result.items.size() + limit; if (result.totalRowCount == null || nextPage <= result.totalRowCount) { response.nextUrl(createNextUrl(id, nextPage, limit)); } + return ResponseEntity.ok(response); } + @TrackExecutionTime @Override public ResponseEntity jobStatus(String id) { var response = queryService.getQueryStatusFromJob(id); @@ -72,31 +87,75 @@ public ResponseEntity jobStatus(String id) { private ResponseEntity sendQuery(String querySql, boolean dryRun) { var response = new QueryCreatedData().querySql(querySql); + if (!querySql.contains(applicationConfiguration.getProject())) { + throw new IllegalArgumentException("Your database is outside of the project"); + } + var lowerCaseQuery = querySql.toLowerCase(); + + try { + var supportedSchemas = TableSchema.supportedSchemas(); + var found = false; + + for (String schema : supportedSchemas) { + if (lowerCaseQuery.contains(schema)) { + found = true; + break; + } + } + + if (!found) { + throw new IllegalArgumentException(INVALID_DATABASE); + } + } catch (Exception e) { + throw new IllegalArgumentException(INVALID_DATABASE); + } + + if (lowerCaseQuery.contains("create table") + || lowerCaseQuery.contains("delete from") + || lowerCaseQuery.contains("drop table") + || lowerCaseQuery.contains("update") + || lowerCaseQuery.contains("alter table")) { + throw new IllegalArgumentException("Those actions are not available in sql"); + } if (!dryRun) { - response.queryId(queryService.startQuery(querySql)); + try { + response.queryId(queryService.startQuery(querySql)); + } catch (BigQueryException e) { + throw new BadQueryException("Could not create job"); + } } return new ResponseEntity<>(response, HttpStatus.OK); } + @TrackExecutionTime @Override - public ResponseEntity bulkData(String version) { - String querySql = "SELECT * FROM " + applicationConfiguration.getBqTable() + "." + version; + public ResponseEntity bulkData(String version, String table) { + String querySql = "SELECT * FROM " + table + "." + version; return sendQuery(querySql, false); } + @TrackExecutionTime @Override public ResponseEntity sqlQuery(String querySql) { + return sendQuery(querySql, false); } + @TrackExecutionTime @Override public ResponseEntity booleanQuery( String version, @Valid Query body, @Valid Boolean dryRun, @Valid String table) { - String querySql = QueryTranslator.sql(table + "." + version, body); - - return sendQuery(querySql, dryRun); + try { + String querySql = new SqlGenerator(table + "." + version, body, version).generate(); + return sendQuery(querySql, dryRun); + } catch (IOException e) { + throw new IllegalArgumentException(INVALID_DATABASE); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException(e.getMessage()); + } } + @TrackExecutionTime @Override public ResponseEntity uniqueValues( String version, String body, String system, String table) { @@ -121,15 +180,24 @@ public ResponseEntity uniqueValues( } else { whereClause = ""; } - StringBuffer unnestConcat = new StringBuffer(); - unnestClauses.stream().forEach((k) -> unnestConcat.append(k)); - - String querySql = "SELECT DISTINCT " + nt.getColumn() + " FROM " + tableName + unnestConcat + whereClause; + StringBuilder unnestConcat = new StringBuilder(); + unnestClauses.forEach(unnestConcat::append); + + String querySql = + "SELECT DISTINCT " + + nt.getColumn() + + " FROM " + + tableName + + unnestConcat + + whereClause + + " ORDER BY " + + nt.getColumn(); logger.debug("uniqueValues: " + querySql); return sendQuery(querySql, false); } + @TrackExecutionTime @Override public ResponseEntity columns(String version, String table) { String tableName; @@ -138,13 +206,42 @@ public ResponseEntity columns(String version, String table) { } else { tableName = table; } - String querySql = "SELECT field_path FROM " - + tableName - + ".INFORMATION_SCHEMA.COLUMN_FIELD_PATHS WHERE table_name = '" - + version - + "'"; + String querySql = + "SELECT field_path FROM " + + tableName + + ".INFORMATION_SCHEMA.COLUMN_FIELD_PATHS WHERE table_name = '" + + version + + "'"; logger.debug("columns: " + querySql); return sendQuery(querySql, false); } + + @Override + public ResponseEntity globalCounts( + String version, @Valid Query body, @Valid Boolean dryRun, @Valid String table) { + try { + String querySql = new CountsSqlGenerator(table + "." + version, body, version).generate(); + return sendQuery(querySql, dryRun); + } catch (IOException e) { + throw new IllegalArgumentException(INVALID_DATABASE); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException(e.getMessage()); + } + } + + @TrackExecutionTime + @Override + public ResponseEntity files( + String version, @Valid Query body, @Valid Boolean dryRun, @Valid String table) { + String querySql = ""; + try { + querySql = new FileSqlGenerator(table + "." + version, body, version).generate(); + } catch (IOException e) { + throw new IllegalArgumentException(INVALID_DATABASE); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException(e.getMessage()); + } + return sendQuery(querySql, dryRun); + } } diff --git a/src/main/java/bio/terra/cda/app/generators/CountsSqlGenerator.java b/src/main/java/bio/terra/cda/app/generators/CountsSqlGenerator.java new file mode 100644 index 00000000..463dd674 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/generators/CountsSqlGenerator.java @@ -0,0 +1,135 @@ +package bio.terra.cda.app.generators; + +import bio.terra.cda.app.operators.BasicOperator; +import bio.terra.cda.app.util.SqlUtil; +import bio.terra.cda.generated.model.Query; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import java.util.LinkedList; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class CountsSqlGenerator extends SqlGenerator { + public CountsSqlGenerator(String qualifiedTable, Query rootQuery, String version) + throws IOException { + super(qualifiedTable, rootQuery, version); + } + + @Override + protected String sql(String tableOrSubClause, Query query) + throws UncheckedExecutionException, IllegalArgumentException { + if (query.getNodeType() == Query.NodeTypeEnum.SUBQUERY) { + // A SUBQUERY is built differently from other queries. The FROM clause is the + // SQL version of + // the right subtree, instead of using table. The left subtree is now the top + // level query. + return sql(String.format("(%s)", sql(tableOrSubClause, query.getR())), query.getL()); + } + Supplier> fromClause = + () -> { + try { + return Stream.concat( + Stream.of(tableOrSubClause + " AS " + table), + ((BasicOperator) query).getUnnestColumns(table, tableSchemaMap).distinct()); + } catch (Exception e) { + throw new UncheckedExecutionException(e); + } + }; + String condition = ((BasicOperator) query).queryString(table, tableSchemaMap); + + var whereClause = + condition != null && condition.length() > 0 + ? String.format("WHERE\n" + " %s\n", condition) + : ""; + + var countArrays = + tableSchemaMap.keySet().stream() + .filter( + field -> { + var splitField = field.split("\\."); + if (splitField.length < 2) { + return false; + } + + return splitField[1].equals("identifier"); + }) + .map( + field -> { + var splitField = field.split("\\."); + return String.join(".", splitField[0], splitField[1]); + }) + .distinct(); + + var selects = new LinkedList(); + var queries = new LinkedList(); + + countArrays.forEach( + field -> { + var splitField = field.split("\\."); + var alias = String.format("%s_count", splitField[0].toLowerCase()); + var filesAlias = String.format("%s_files_count", splitField[0].toLowerCase()); + selects.add(getSelectField(alias)); + selects.add(getSelectField(filesAlias)); + // Entity count + queries.add(getSubQuery(fromClause, whereClause, alias, field, field)); + // File count + queries.add(getSubQuery(fromClause, whereClause, filesAlias, field, "identifier")); + }); + + return String.format("SELECT\n" + " identifiers.system,\n" + "%s", String.join(",\n ", selects)) + + String.format( + " FROM (\n" + + " SELECT DISTINCT _Identifier.system\n" + + " FROM %1$s AS %2$s,\n" + + " UNNEST(identifier) AS _Identifier\n" + + ") as identifiers \n" + + "%3$s", + tableOrSubClause, table, String.join(" \n ", queries)); + } + + private String getSubQuery( + Supplier> currentUnnests, + String whereClause, + String alias, + String groupByField, + String countByField) { + var from = + Stream.concat( + currentUnnests.get(), + Stream.concat( + SqlUtil.getUnnestsFromParts(table, groupByField.split("\\."), true), + SqlUtil.getUnnestsFromParts(table, countByField.split("\\."), true))) + .distinct() + .collect(Collectors.joining(",\n")); + + var groupBySplit = groupByField.split("\\."); + var countBySplit = countByField.split("\\."); + + return String.format( + " LEFT OUTER JOIN (\n" + + " SELECT\n" + + " %1$s.system,\n" + + " COUNT(DISTINCT %2$s.value) AS count\n" + + " FROM\n" + + " %3$s\n" + + " %4$s" + + " GROUP BY\n" + + " %1$s.system\n" + + " ) AS %5$s ON %5$s.system = identifiers.system\n", + SqlUtil.getAlias(groupBySplit.length - 1, groupBySplit), + SqlUtil.getAlias(countBySplit.length - 1, countBySplit), + from, + whereClause, + alias); + } + + private String getSelectField(String alias) { + return String.format( + " CASE\n" + + " WHEN %1$s.count IS NULL THEN 0\n" + + " ELSE %1$s.count\n" + + " END AS %1$s\n", + alias); + } +} diff --git a/src/main/java/bio/terra/cda/app/generators/FileSqlGenerator.java b/src/main/java/bio/terra/cda/app/generators/FileSqlGenerator.java new file mode 100644 index 00000000..68a54523 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/generators/FileSqlGenerator.java @@ -0,0 +1,11 @@ +package bio.terra.cda.app.generators; + +import bio.terra.cda.generated.model.Query; +import java.io.IOException; + +public class FileSqlGenerator extends SqlGenerator { + public FileSqlGenerator(String qualifiedTable, Query rootQuery, String version) + throws IOException { + super(qualifiedTable, rootQuery, version); + } +} diff --git a/src/main/java/bio/terra/cda/app/generators/SqlGenerator.java b/src/main/java/bio/terra/cda/app/generators/SqlGenerator.java new file mode 100644 index 00000000..b4a105b7 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/generators/SqlGenerator.java @@ -0,0 +1,75 @@ +package bio.terra.cda.app.generators; + +import bio.terra.cda.app.operators.BasicOperator; +import bio.terra.cda.app.util.SqlUtil; +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class SqlGenerator { + final String qualifiedTable; + final Query rootQuery; + final String table; + final Map tableSchemaMap; + final List tableSchema; + + public SqlGenerator(String qualifiedTable, Query rootQuery, String version) throws IOException { + this.qualifiedTable = qualifiedTable; + this.rootQuery = rootQuery; + int dotPos = qualifiedTable.lastIndexOf('.'); + this.table = dotPos == -1 ? qualifiedTable : qualifiedTable.substring(dotPos + 1); + this.tableSchema = TableSchema.getSchema(version); + this.tableSchemaMap = TableSchema.buildSchemaMap(this.tableSchema); + } + + public String generate() throws IllegalArgumentException { + return sql(qualifiedTable, rootQuery); + } + + protected String sql(String tableOrSubClause, Query query) throws IllegalArgumentException { + if (query.getNodeType() == Query.NodeTypeEnum.SUBQUERY) { + // A SUBQUERY is built differently from other queries. The FROM clause is the + // SQL version of + // the right subtree, instead of using table. The left subtree is now the top + // level query. + return sql(String.format("(%s)", sql(tableOrSubClause, query.getR())), query.getL()); + } + + var fromClause = + Stream.concat( + Stream.of(tableOrSubClause + " AS " + table), + ((BasicOperator) query).getUnnestColumns(table, tableSchemaMap).distinct()) + .collect(Collectors.joining(", ")); + + String condition = ((BasicOperator) query).queryString(table, tableSchemaMap); + + return String.format("%s FROM %s WHERE %s", getSelect(query, table), fromClause, condition); + } + + protected String getSelect(Query query, String table) { + if (query.getNodeType() == Query.NodeTypeEnum.SELECT) { + return String.format("SELECT %s", queryToSelect(query).collect(Collectors.joining(","))); + } else { + return String.format("SELECT %s.*", table); + } + } + + protected Stream queryToSelect(Query query) { + return Arrays.stream(query.getL().getValue().split(",")) + .map( + select -> { + var parts = + Arrays.stream(select.split("\\.")).map(String::trim).toArray(String[]::new); + return String.format( + "%s.%s AS %s", + parts.length == 1 ? table : SqlUtil.getAlias(parts.length - 2, parts), + parts[parts.length - 1], + String.join("_", parts)); + }); + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/BasicOperator.java b/src/main/java/bio/terra/cda/app/operators/BasicOperator.java new file mode 100644 index 00000000..a9037d08 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/BasicOperator.java @@ -0,0 +1,25 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.util.Map; +import java.util.stream.Stream; + +public class BasicOperator extends Query { + public Stream getUnnestColumns( + String table, Map tableSchemaMap) + throws IllegalArgumentException { + return Stream.concat( + ((BasicOperator) getL()).getUnnestColumns(table, tableSchemaMap), + ((BasicOperator) getR()).getUnnestColumns(table, tableSchemaMap)); + } + + public String queryString(String table, Map tableSchemaMap) + throws IllegalArgumentException { + return String.format( + "(%s %s %s)", + ((BasicOperator) getL()).queryString(table, tableSchemaMap), + this.getNodeType(), + ((BasicOperator) getR()).queryString(table, tableSchemaMap)); + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/Column.java b/src/main/java/bio/terra/cda/app/operators/Column.java new file mode 100644 index 00000000..c1da8cfb --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/Column.java @@ -0,0 +1,46 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.SqlUtil; +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.util.Map; +import java.util.stream.Stream; + +@QueryOperator(nodeType = Query.NodeTypeEnum.COLUMN) +public class Column extends BasicOperator { + @Override + public Stream getUnnestColumns( + String table, Map tableSchemaMap) { + try { + var tmp = tableSchemaMap.get(getValue()); + var tmpGetMode = tmp.getMode(); + var parts = getValue().split("\\."); + return SqlUtil.getUnnestsFromParts(table, parts, (tmpGetMode.equals("REPEATED"))); + } catch (NullPointerException e) { + throw new IllegalArgumentException( + String.format("Column %s does not exist on table %s", getValue(), table)); + } + } + + @Override + public String queryString( + String table, Map tableSchemaMap) { + var tmp = tableSchemaMap.get(getValue()); + var tmpGetMode = tmp.getMode(); + var tmpGetType = tmp.getType(); + var value = getValue(); + var parts = value.split("\\."); + var columnText = ""; + if (tmpGetMode.equals("REPEATED")) { + columnText = String.format("%s", SqlUtil.getAlias(parts.length - 1, parts)); + } else if (parts.length == 1) { + columnText = String.format("%s.%s", table, value); + } else { + columnText = + String.format( + "%s.%s", SqlUtil.getAlias(parts.length - 2, parts), parts[parts.length - 1]); + } + + return tmpGetType.equals("STRING") ? String.format("UPPER(%s)", columnText) : columnText; + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/In.java b/src/main/java/bio/terra/cda/app/operators/In.java new file mode 100644 index 00000000..7caeabe8 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/In.java @@ -0,0 +1,22 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.util.Map; + +@QueryOperator(nodeType = Query.NodeTypeEnum.IN) +public class In extends SingleSidedOperator { + @Override + public String queryString(String table, Map tableSchemaMap) + throws IllegalArgumentException { + String right = ((BasicOperator) getR()).queryString(table, tableSchemaMap); + if (right.contains("[") || right.contains("(")) { + right = right.substring(1, right.length() - 1).replace("\"", "'"); + } else { + throw new IllegalArgumentException("To use IN you need to add [ or ("); + } + + String left = ((BasicOperator) getL()).queryString(table, tableSchemaMap); + return String.format("(%s IN (%s))", left, right); + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/Like.java b/src/main/java/bio/terra/cda/app/operators/Like.java new file mode 100644 index 00000000..055781bd --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/Like.java @@ -0,0 +1,16 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.util.Map; + +@QueryOperator(nodeType = Query.NodeTypeEnum.LIKE) +public class Like extends SingleSidedOperator { + @Override + public String queryString(String table, Map tableSchemaMap) + throws IllegalArgumentException { + String rightValue = getR().getValue(); + String leftValue = ((BasicOperator) getL()).queryString(table, tableSchemaMap); + return String.format("%s LIKE UPPER(%s)", leftValue, rightValue); + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/Not.java b/src/main/java/bio/terra/cda/app/operators/Not.java new file mode 100644 index 00000000..4d048f95 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/Not.java @@ -0,0 +1,15 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.util.Map; + +@QueryOperator(nodeType = Query.NodeTypeEnum.NOT) +public class Not extends SingleSidedOperator { + @Override + public String queryString(String table, Map tableSchemaMap) + throws IllegalArgumentException { + return String.format( + "(%s %s)", getNodeType(), ((BasicOperator) getL()).queryString(table, tableSchemaMap)); + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/OperatorDeserializer.java b/src/main/java/bio/terra/cda/app/operators/OperatorDeserializer.java new file mode 100644 index 00000000..fc20c329 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/OperatorDeserializer.java @@ -0,0 +1,81 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.generated.model.Query; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.ObjectCodec; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.util.Objects; +import org.springframework.context.annotation.ClassPathScanningCandidateComponentProvider; +import org.springframework.core.type.filter.AnnotationTypeFilter; + +public class OperatorDeserializer extends JsonDeserializer { + @Override + public Query deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { + // Get reference to ObjectCodec + ObjectCodec codec = p.getCodec(); + + // Parse "object" node into Jackson's tree model + JsonNode node = codec.readTree(p); + + if (node.isNull()) { + return null; + } + + var nodeType = node.get("node_type"); + + Query.NodeTypeEnum type = Query.NodeTypeEnum.fromValue(nodeType.textValue()); + + ClassPathScanningCandidateComponentProvider scanner = + new ClassPathScanningCandidateComponentProvider(false); + + scanner.addIncludeFilter(new AnnotationTypeFilter(QueryOperator.class)); + + var clazz = + scanner.findCandidateComponents("bio.terra.cda.app.operators").stream() + .map( + cls -> { + try { + return Class.forName(cls.getBeanClassName()); + } catch (ClassNotFoundException e) { + return null; + } + }) + .filter(Objects::nonNull) + .filter( + cls -> { + QueryOperator operator = cls.getAnnotation(QueryOperator.class); + return operator.nodeType().equals(type); + }) + .findFirst(); + + Query query; + + if (clazz.isPresent()) { + Constructor ctor = null; + try { + ctor = clazz.get().getConstructor(); + } catch (NoSuchMethodException e) { + return null; + } + try { + query = (Query) ctor.newInstance(); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + return null; + } + } else { + query = new BasicOperator(); + } + + query.setNodeType(type); + query.setL(codec.treeToValue(node.get("l"), Query.class)); + query.setR(codec.treeToValue(node.get("r"), Query.class)); + query.setValue(node.hasNonNull("value") ? node.get("value").textValue() : null); + + return query; + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/QueryModule.java b/src/main/java/bio/terra/cda/app/operators/QueryModule.java new file mode 100644 index 00000000..ea374437 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/QueryModule.java @@ -0,0 +1,12 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.generated.model.Query; +import com.fasterxml.jackson.databind.module.SimpleModule; + +public class QueryModule extends SimpleModule { + public QueryModule() { + super(); + + addDeserializer(Query.class, new OperatorDeserializer()); + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/QueryOperator.java b/src/main/java/bio/terra/cda/app/operators/QueryOperator.java new file mode 100644 index 00000000..48a25824 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/QueryOperator.java @@ -0,0 +1,13 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.generated.model.Query; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface QueryOperator { + public Query.NodeTypeEnum nodeType(); +} diff --git a/src/main/java/bio/terra/cda/app/operators/Quoted.java b/src/main/java/bio/terra/cda/app/operators/Quoted.java new file mode 100644 index 00000000..65f2c9f4 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/Quoted.java @@ -0,0 +1,28 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.util.Map; +import java.util.stream.Stream; + +@QueryOperator(nodeType = Query.NodeTypeEnum.QUOTED) +public class Quoted extends BasicOperator { + @Override + public Stream getUnnestColumns( + String table, Map tableSchemaMap) { + return Stream.empty(); + } + + @Override + public String queryString( + String table, Map tableSchemaMap) { + String value = getValue(); + // Int check + if (value.contains("days_to_birth") + || value.contains("age_at_death") + || value.contains("age_")) { + return String.format("'%s'", value); + } + return String.format("UPPER('%s')", value); + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/Select.java b/src/main/java/bio/terra/cda/app/operators/Select.java new file mode 100644 index 00000000..c659a2f9 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/Select.java @@ -0,0 +1,14 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.util.Map; + +@QueryOperator(nodeType = Query.NodeTypeEnum.SELECT) +public class Select extends BasicOperator { + @Override + public String queryString(String table, Map tableSchemaMap) + throws IllegalArgumentException { + return ((BasicOperator) getR()).queryString(table, tableSchemaMap); + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/SelectValues.java b/src/main/java/bio/terra/cda/app/operators/SelectValues.java new file mode 100644 index 00000000..84b94d0f --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/SelectValues.java @@ -0,0 +1,25 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.SqlUtil; +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.util.Arrays; +import java.util.Map; +import java.util.stream.Stream; + +@QueryOperator(nodeType = Query.NodeTypeEnum.SELECTVALUES) +public class SelectValues extends BasicOperator { + @Override + public Stream getUnnestColumns( + String table, Map tableSchemaMap) + throws IllegalArgumentException { + return Arrays.stream(getValue().split(",")) + .flatMap(select -> SqlUtil.getUnnestsFromParts(table, select.trim().split("\\."), false)); + } + + @Override + public String queryString(String table, Map tableSchemaMap) + throws IllegalArgumentException { + return ""; + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/SingleSidedOperator.java b/src/main/java/bio/terra/cda/app/operators/SingleSidedOperator.java new file mode 100644 index 00000000..b871078e --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/SingleSidedOperator.java @@ -0,0 +1,14 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.TableSchema; +import java.util.Map; +import java.util.stream.Stream; + +public class SingleSidedOperator extends BasicOperator { + @Override + public Stream getUnnestColumns( + String table, Map tableSchemaMap) + throws IllegalArgumentException { + return ((BasicOperator) getL()).getUnnestColumns(table, tableSchemaMap); + } +} diff --git a/src/main/java/bio/terra/cda/app/operators/Unquoted.java b/src/main/java/bio/terra/cda/app/operators/Unquoted.java new file mode 100644 index 00000000..45286333 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/operators/Unquoted.java @@ -0,0 +1,22 @@ +package bio.terra.cda.app.operators; + +import bio.terra.cda.app.util.TableSchema; +import bio.terra.cda.generated.model.Query; +import java.util.Map; +import java.util.stream.Stream; + +@QueryOperator(nodeType = Query.NodeTypeEnum.UNQUOTED) +public class Unquoted extends BasicOperator { + @Override + public Stream getUnnestColumns( + String table, Map tableSchemaMap) + throws IllegalArgumentException { + return Stream.empty(); + } + + @Override + public String queryString(String table, Map tableSchemaMap) + throws IllegalArgumentException { + return String.format("%s", getValue()); + } +} diff --git a/src/main/java/bio/terra/cda/app/service/QueryService.java b/src/main/java/bio/terra/cda/app/service/QueryService.java index e752fa05..5260e606 100644 --- a/src/main/java/bio/terra/cda/app/service/QueryService.java +++ b/src/main/java/bio/terra/cda/app/service/QueryService.java @@ -2,6 +2,7 @@ import static java.lang.Thread.currentThread; +import bio.terra.cda.app.configuration.ApplicationConfiguration; import bio.terra.cda.app.service.exception.BadQueryException; import bio.terra.cda.generated.model.JobStatusData; import bio.terra.cda.generated.model.SystemStatus; @@ -16,10 +17,13 @@ import com.google.cloud.bigquery.*; import com.google.common.annotations.VisibleForTesting; import java.text.SimpleDateFormat; +import java.time.Instant; import java.util.*; +import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.cache.annotation.CacheConfig; import org.springframework.cache.annotation.CacheEvict; import org.springframework.cache.annotation.Cacheable; @@ -28,14 +32,21 @@ @Component @CacheConfig(cacheNames = "system-status") public class QueryService { + @Autowired public ApplicationConfiguration applicationConfiguration; + Map jobCreationStatus = new HashMap<>(); - private static final Logger logger = LoggerFactory.getLogger(QueryService.class); + @Value("${project}") + private String project; + + @Value("${bqTable:default}") + private String bqTable; - final BigQuery bigQuery = - BigQueryOptions.newBuilder().setProjectId("gdc-bq-sample").build().getService(); + private static final Logger logger = LoggerFactory.getLogger(QueryService.class); private final ObjectMapper objectMapper; + @Autowired private BigQuery bigQuery; + @Autowired public QueryService(ObjectMapper objectMapper) { this.objectMapper = objectMapper; @@ -53,8 +64,8 @@ public SystemStatus bigQueryCheck() { SystemStatusSystems bigQuerySystemStatus = new SystemStatusSystems(); boolean success = false; try { - String statusCheck = bigQuery.getDataset("cda_mvp").getDatasetId().getDataset(); - success = statusCheck.equals("cda_mvp"); + String statusCheck = bigQuery.getDataset(bqTable).getDatasetId().getDataset(); + success = statusCheck.equals(bqTable); } catch (Exception e) { logger.error("Status check failed ", e); } @@ -65,7 +76,11 @@ public SystemStatus bigQueryCheck() { bigQuerySystemStatus .ok(false) .addMessagesItem( - "BiqQuery Status check has indicated the 'cda_mvp' dataset is currently unreachable from the Service API"); + "PROJECT: " + + project + + " - BiqQuery Status check has indicated the '" + + bqTable + + "' dataset is currently unreachable from the Service API"); } systemStatus .ok(bigQuerySystemStatus.getOk()) @@ -73,6 +88,7 @@ public SystemStatus bigQueryCheck() { return systemStatus; } + /** * Convert a BQ value to a json node. * @@ -128,6 +144,10 @@ public QueryResult getQueryResults(String queryId, int offset, int pageSize) { return getJobResults(job, offset, pageSize); } + public void setBigQuery(BigQuery bigQuery) { + this.bigQuery = bigQuery; + } + public static class QueryResult { public final List items; public final Long totalRowCount; @@ -165,8 +185,10 @@ private QueryResult getJobResults(Job queryJob, int offset, int pageSize) { FieldValue.of(FieldValue.Attribute.RECORD, row), Field.of("root", LegacySQLTypeName.RECORD, fields))); - // This check is required because pageSize is the number of rows BQ retrieves at a time, - // not the total number of rows returned by iterateAll(). Without this check, this loop + // This check is required because pageSize is the number of rows BQ retrieves at + // a time, + // not the total number of rows returned by iterateAll(). Without this check, + // this loop // would return all rows in the result table. if (++rowCount == pageSize) { break; @@ -200,8 +222,10 @@ private static class QueryData { } } - // For now, hardcode the known list of systems. In the future, we will get this from the - // database itself, as each published dataset will have a list of contributing systems. + // For now, hardcode the known list of systems. In the future, we will get this + // from the + // database itself, as each published dataset will have a list of contributing + // systems. private enum Source { GDC, PDC @@ -231,7 +255,8 @@ private Map generateUsageData(List jsonData) { } private static String getSqlFromJob(Job queryJob) { - // This cast is safe because it's only done on queries that have been generated using + // This cast is safe because it's only done on queries that have been generated + // using // startQuery() below. return ((QueryJobConfiguration) queryJob.getConfiguration()).getQuery(); } @@ -254,7 +279,8 @@ private void logQuery(Job queryJob, List jsonData) { // Log usage data for this response. final Map resultsCount = generateUsageData(jsonData); float elapsed = 0; - // In some cases endTime is null, even though startTime and creationTime are non-null and the + // In some cases endTime is null, even though startTime and creationTime are + // non-null and the // job is complete. if (queryJob.getStatistics().getEndTime() != null && queryJob.getStatistics().getStartTime() != null) { @@ -271,12 +297,29 @@ private void logQuery(Job queryJob, List jsonData) { } public String startQuery(String query) { - var queryConfig = QueryJobConfiguration.newBuilder(query).setUseLegacySql(false); - + String jobID = UUID.randomUUID().toString(); + String destinationDataset = "Job_Queue"; + String destinationTable = String.format("Job_%s", jobID); + TableId tableId = TableId.of(destinationDataset, destinationTable); + TableDefinition tableDefinition = StandardTableDefinition.of(Schema.of()); + TableInfo tableInfo = + TableInfo.newBuilder(tableId, tableDefinition) + .setExpirationTime(Instant.now().toEpochMilli() + TimeUnit.MINUTES.toMillis(10)) + .build(); + + QueryJobConfiguration.Builder queryConfig = + QueryJobConfiguration.newBuilder(query) + .setUseLegacySql(false) + .setUseQueryCache(true) + .setAllowLargeResults(true) + .setDestinationTable(tableInfo.getTableId()); // Create a job ID so that we can safely retry. - JobId jobId = JobId.of(UUID.randomUUID().toString()); + JobId jobId = JobId.of(jobID); + /** + * Biguery has a maximum wait time by default of 10 seconds this will update the max time to + * 1min. + */ Job queryJob = bigQuery.create(JobInfo.newBuilder(queryConfig.build()).setJobId(jobId).build()); - return queryJob.getJobId().getJob(); } } diff --git a/src/main/java/bio/terra/cda/app/util/QueryTranslator.java b/src/main/java/bio/terra/cda/app/util/QueryTranslator.java deleted file mode 100644 index a7baa43b..00000000 --- a/src/main/java/bio/terra/cda/app/util/QueryTranslator.java +++ /dev/null @@ -1,97 +0,0 @@ -package bio.terra.cda.app.util; - -import bio.terra.cda.generated.model.Query; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import java.util.stream.Stream; - -/** Class to translate the endpoint Query object to a Big Query query string. */ -public class QueryTranslator { - - /** - * Create a SQL query string given a table (or subquery) and a Query object. - * - * @param table the table to use as the first element of the FROM clause - * @param query the Query object - * @return a SQL query string - */ - public static String sql(String table, Query query) { - return new SqlGenerator(table, query).generate(); - } - - // A convenience class to avoid having to pass 'table' around to all the methods. - private static class SqlGenerator { - final String qualifiedTable; - final Query rootQuery; - final String table; - - private SqlGenerator(String qualifiedTable, Query rootQuery) { - this.qualifiedTable = qualifiedTable; - this.rootQuery = rootQuery; - int dotPos = qualifiedTable.lastIndexOf('.'); - this.table = dotPos == -1 ? qualifiedTable : qualifiedTable.substring(dotPos + 1); - } - - private String generate() { - return sql(qualifiedTable, rootQuery); - } - - private String sql(String tableOrSubClause, Query query) { - if (query.getNodeType() == Query.NodeTypeEnum.SUBQUERY) { - // A SUBQUERY is built differently from other queries. The FROM clause is the SQL version of - // the right subtree, instead of using table. The left subtree is now the top level query. - return sql(String.format("(%s)", sql(tableOrSubClause, query.getR())), query.getL()); - } - var fromClause = - Stream.concat( - Stream.of(tableOrSubClause + " AS " + table), getUnnestColumns(query).distinct()) - .collect(Collectors.joining(", ")); - - var condition = queryString(query); - return String.format("SELECT %s.* FROM %s WHERE %s", table, fromClause, condition); - } - - private Stream getUnnestColumns(Query query) { - switch (query.getNodeType()) { - case QUOTED: - case UNQUOTED: - return Stream.empty(); - case COLUMN: - var parts = query.getValue().split("\\."); - return IntStream.range(0, parts.length - 1) - .mapToObj( - i -> - i == 0 - ? String.format("UNNEST(%1$s) AS _%1$s", parts[i]) - : String.format("UNNEST(_%1$s.%2$s) AS _%2$s", parts[i - 1], parts[i])); - case NOT: - return getUnnestColumns(query.getL()); - default: - return Stream.concat(getUnnestColumns(query.getL()), getUnnestColumns(query.getR())); - } - } - - private String queryString(Query query) { - switch (query.getNodeType()) { - case QUOTED: - return String.format("'%s'", query.getValue()); - case UNQUOTED: - return String.format("%s", query.getValue()); - case COLUMN: - var parts = query.getValue().split("\\."); - if (parts.length > 1) { - return String.format("_%s.%s", parts[parts.length - 2], parts[parts.length - 1]); - } - // Top level fields must be scoped by the table name, otherwise they could conflict with - // unnested fields. - return String.format("%s.%s", table, query.getValue()); - case NOT: - return String.format("(%s %s)", query.getNodeType(), queryString(query.getL())); - default: - return String.format( - "(%s %s %s)", - queryString(query.getL()), query.getNodeType(), queryString(query.getR())); - } - } - } -} diff --git a/src/main/java/bio/terra/cda/app/util/SqlUtil.java b/src/main/java/bio/terra/cda/app/util/SqlUtil.java new file mode 100644 index 00000000..dd7b95bc --- /dev/null +++ b/src/main/java/bio/terra/cda/app/util/SqlUtil.java @@ -0,0 +1,27 @@ +package bio.terra.cda.app.util; + +import java.util.Arrays; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +public class SqlUtil { + private SqlUtil() {} + + public static Stream getUnnestsFromParts( + String table, String[] parts, boolean includeLast) { + return IntStream.range(0, parts.length - (includeLast ? 0 : 1)) + .mapToObj( + i -> + i == 0 + ? String.format( + "UNNEST(%1$s.%2$s) AS %3$s", table, parts[i], getAlias(i, parts)) + : String.format( + "UNNEST(%1$s.%2$s) AS %3$s", + getAlias(i - 1, parts), parts[i], getAlias(i, parts))); + } + + public static String getAlias(Integer index, String[] parts) { + return "_" + Arrays.stream(parts, 0, index + 1).collect(Collectors.joining("_")); + } +} diff --git a/src/main/java/bio/terra/cda/app/util/TableSchema.java b/src/main/java/bio/terra/cda/app/util/TableSchema.java new file mode 100644 index 00000000..bcdbfe30 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/util/TableSchema.java @@ -0,0 +1,172 @@ +package bio.terra.cda.app.util; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.type.CollectionType; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.springframework.core.io.ClassPathResource; + +public class TableSchema { + public static class SchemaDefinition { + private String mode; + private String name; + private String type; + private String description; + private SchemaDefinition[] fields; + + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public void setFields(SchemaDefinition[] fields) { + this.fields = fields; + } + + public SchemaDefinition[] getFields() { + return this.fields; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getDescription() { + return this.description; + } + } + + private TableSchema() {} + + public static List getSchema(String version) throws IOException { + return loadSchemaFromFile(getFileName(version)); + } + + public static Map buildSchemaMap(List definitions) { + Map definitionMap = new HashMap<>(); + addToMap("", definitions, definitionMap); + return definitionMap; + } + + public static List getSchemaByColumnName( + List definitions, String columnName) { + List newSchema = new ArrayList<>(); + + definitions.forEach(def -> hasColumn(def, columnName).ifPresent(newSchema::add)); + + return newSchema; + } + + public static List supportedSchemas() throws IOException { + ClassLoader classLoader = TableSchema.class.getClassLoader(); + + URL resource = classLoader.getResource("schema"); + + if (resource == null) { + throw new IOException("Schema does not exist"); + } + + try (Stream fileStream = Files.walk(Paths.get(resource.toURI()))) { + return fileStream + .filter(path -> path.getFileName().toString().endsWith(".json")) + .map( + path -> { + var file = path.getFileName().toString(); + return file.substring(0, file.length() - 5).toLowerCase(); + }) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new IOException(e.getMessage()); + } + } + + private static Optional hasColumn( + SchemaDefinition definition, String columnName) { + SchemaDefinition newDef = new SchemaDefinition(); + newDef.setDescription(definition.getDescription()); + newDef.setMode(definition.getMode()); + newDef.setName(definition.getName()); + newDef.setType(definition.getType()); + + if (newDef.getName().equals(columnName)) { + return Optional.of(newDef); + } + + if (definition.getFields() == null) { + return Optional.empty(); + } + + List newFields = new ArrayList<>(); + Arrays.stream(definition.getFields()) + .forEach(def -> hasColumn(def, columnName).ifPresent(newFields::add)); + + if (newFields.isEmpty()) { + return Optional.empty(); + } + + SchemaDefinition[] fields = new SchemaDefinition[newFields.size()]; + newDef.setFields(newFields.toArray(fields)); + + return Optional.of(newDef); + } + + private static String getFileName(String version) { + return String.format("schema/%s.json", version); + } + + private static List loadSchemaFromFile(String fileName) throws IOException { + ClassPathResource resource = new ClassPathResource(fileName); + InputStream inputStream = resource.getInputStream(); + ObjectMapper mapper = new ObjectMapper(); + CollectionType collectionType = + mapper.getTypeFactory().constructCollectionType(List.class, SchemaDefinition.class); + + return mapper.readValue(inputStream, collectionType); + } + + private static void addToMap( + String prefix, + List definitions, + Map definitionMap) { + definitions.forEach( + definition -> { + var mapName = + prefix.isEmpty() ? definition.name : String.format("%s.%s", prefix, definition.name); + definitionMap.put(mapName, definition); + if (definition.type.equals("RECORD")) { + addToMap(mapName, List.of(definition.fields), definitionMap); + } + }); + } +} diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index cfa8b0b0..1bf300a1 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -1,16 +1,17 @@ openapi: 3.0.3 info: description: API definition for the CDA - version: 2.0.0 + version: 2.1.0 title: CDA API license: name: Apache 2.0 - url: 'http://www.apache.org/licenses/LICENSE-2.0.html' + url: "http://www.apache.org/licenses/LICENSE-2.0.html" servers: + - url: https://cancerdata.dsde-dev.broadinstitute.org - url: https://cda.cda-dev.broadinstitute.org - url: http://localhost:8080 - - url: http://34.71.0.127:8080 + - url: http://35.192.60.10:8080 tags: - name: query @@ -28,10 +29,9 @@ paths: - meta responses: 200: - $ref: '#/components/responses/StatusResponse' + $ref: "#/components/responses/StatusResponse" 500: - $ref: '#/components/responses/StatusResponse' - + $ref: "#/components/responses/StatusResponse" /api/v1/dataset-description/latest: get: @@ -46,8 +46,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/DatasetDescription' - + $ref: "#/components/schemas/DatasetDescription" /api/v1/dataset-description/all: get: @@ -64,8 +63,7 @@ paths: schema: type: array items: - $ref: '#/components/schemas/DatasetDescription' - + $ref: "#/components/schemas/DatasetDescription" /api/v1/bulk-data/{version}: get: @@ -75,11 +73,11 @@ paths: tags: - query parameters: - - $ref: '#/components/parameters/DatasetVersion' - + - $ref: "#/components/parameters/DatasetVersion" + - $ref: "#/components/parameters/Table" responses: 200: - $ref: '#/components/responses/QueryCreated' + $ref: "#/components/responses/QueryCreated" /api/v1/boolean-query/{version}: @@ -93,14 +91,15 @@ paths: - query parameters: - - $ref: '#/components/parameters/DatasetVersion' + - $ref: "#/components/parameters/DatasetVersion" - in: query description: If true, don't run the query, only generate and return it. name: dryRun schema: type: boolean default: false - - $ref: '#/components/parameters/Table' + - $ref: "#/components/parameters/Table" + requestBody: description: The boolean query @@ -108,11 +107,11 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Query' + $ref: "#/components/schemas/Query" responses: 200: - $ref: '#/components/responses/QueryCreated' + $ref: "#/components/responses/QueryCreated" /api/v1/sql-query: post: @@ -131,7 +130,7 @@ paths: responses: 200: - $ref: '#/components/responses/QueryCreated' + $ref: "#/components/responses/QueryCreated" /api/v1/query/{id}: get: @@ -145,13 +144,13 @@ paths: - query parameters: - - $ref: '#/components/parameters/QueryId' - - $ref: '#/components/parameters/ResultOffset' - - $ref: '#/components/parameters/ResultLimit' + - $ref: "#/components/parameters/QueryId" + - $ref: "#/components/parameters/ResultOffset" + - $ref: "#/components/parameters/ResultLimit" responses: 200: - $ref: '#/components/responses/QueryResponse' + $ref: "#/components/responses/QueryResponse" /api/v1/job-status/{id}: get: @@ -165,11 +164,11 @@ paths: - query parameters: - - $ref: '#/components/parameters/QueryId' + - $ref: "#/components/parameters/QueryId" responses: 200: - $ref: '#/components/responses/JobStatusResponse' + $ref: "#/components/responses/JobStatusResponse" /api/v1/unique-values/{version}: post: @@ -180,9 +179,9 @@ paths: - query parameters: - - $ref: '#/components/parameters/DatasetVersion' - - $ref: '#/components/parameters/SystemValue' - - $ref: '#/components/parameters/Table' + - $ref: "#/components/parameters/DatasetVersion" + - $ref: "#/components/parameters/SystemValue" + - $ref: "#/components/parameters/Table" requestBody: description: column_name of table value being requested @@ -194,7 +193,7 @@ paths: responses: 200: - $ref: '#/components/responses/QueryCreated' + $ref: "#/components/responses/QueryCreated" /api/v1/columns/{version}: get: @@ -205,15 +204,73 @@ paths: - query parameters: - - $ref: '#/components/parameters/DatasetVersion' - - $ref: '#/components/parameters/Table' + - $ref: "#/components/parameters/DatasetVersion" + - $ref: "#/components/parameters/Table" responses: 200: - $ref: '#/components/responses/QueryCreated' + $ref: "#/components/responses/QueryCreated" -components: + /api/v1/global-counts/{version}: + post: + summary: Returns counts of the DCS + description: Return GlobalCounts for schema + operationId: GlobalCounts + tags: + - query + + parameters: + - $ref: "#/components/parameters/DatasetVersion" + - in: query + description: If true, don't run the query, only generate and return it. + name: dryRun + schema: + type: boolean + default: false + - $ref: "#/components/parameters/Table" + requestBody: + description: counts + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/Query" + responses: + 200: + $ref: "#/components/responses/QueryCreated" + + /api/v1/files/{version}: + post: + summary: Returns a list of files given a boolean query + description: Return list of files for given query + operationId: files + tags: + - query + + parameters: + - $ref: "#/components/parameters/DatasetVersion" + - in: query + description: If true, don't run the query, only generate and return it. + name: dryRun + schema: + type: boolean + default: false + - $ref: "#/components/parameters/Table" + + requestBody: + description: The boolean query + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/Query" + + responses: + 200: + $ref: "#/components/responses/QueryCreated" + +components: parameters: DatasetVersion: in: path @@ -221,7 +278,7 @@ components: required: true schema: type: string - default: all_v2 + default: all_v2_1 description: Dataset version SystemValue: in: query @@ -234,7 +291,7 @@ components: name: table name schema: type: string - default: gdc-bq-sample.integration + default: broad-dsde-dev.cda_dev description: Filter on system for results ResultOffset: in: query @@ -262,12 +319,10 @@ components: name: table schema: type: string - default: gdc-bq-sample.integration + default: broad-dsde-dev.cda_dev description: tablename - schemas: - ErrorReport: type: object properties: @@ -332,17 +387,17 @@ components: type: object properties: release-date: - $ref: '#/components/schemas/DateType' + $ref: "#/components/schemas/DateType" cda-version: type: string cda-model: - $ref: '#/components/schemas/Model' + $ref: "#/components/schemas/Model" notes: type: string datasets: type: array items: - $ref: '#/components/schemas/DatasetInfo' + $ref: "#/components/schemas/DatasetInfo" DateType: type: string @@ -355,7 +410,7 @@ components: version: type: string date: - $ref: '#/components/schemas/DateType' + $ref: "#/components/schemas/DateType" Model: type: object @@ -363,7 +418,7 @@ components: version: type: string date: - $ref: '#/components/schemas/DateType' + $ref: "#/components/schemas/DateType" model: type: object properties: {} @@ -404,7 +459,27 @@ components: properties: node_type: type: string - enum: [column, quoted, unquoted, ">=", "<=", "<", ">", "=", "!=", AND, OR, NOT, SUBQUERY,WHERE] + enum: + [ + column, + quoted, + unquoted, + ">=", + "<=", + "<", + ">", + "=", + "!=", + AND, + OR, + NOT, + SUBQUERY, + WHERE, + IN, + SELECTVALUES, + SELECT, + LIKE + ] description: > Query contents and behavior depends on `node_type`: * `column` - column name is in `value` @@ -416,36 +491,35 @@ components: value: type: string l: - $ref: '#/components/schemas/Query' + $ref: "#/components/schemas/Query" r: - $ref: '#/components/schemas/Query' + $ref: "#/components/schemas/Query" responses: - StatusResponse: description: common status response content: application/json: schema: - $ref: '#/components/schemas/SystemStatus' + $ref: "#/components/schemas/SystemStatus" QueryCreated: description: query created response content: application/json: schema: - $ref: '#/components/schemas/QueryCreatedData' + $ref: "#/components/schemas/QueryCreatedData" QueryResponse: description: query data response content: application/json: schema: - $ref: '#/components/schemas/QueryResponseData' + $ref: "#/components/schemas/QueryResponseData" JobStatusResponse: description: Get's Bigquery job id status content: application/json: schema: - $ref: '#/components/schemas/JobStatusData' + $ref: "#/components/schemas/JobStatusData" diff --git a/src/main/resources/application-dev.properties b/src/main/resources/application-dev.properties new file mode 100644 index 00000000..6c7758ad --- /dev/null +++ b/src/main/resources/application-dev.properties @@ -0,0 +1,7 @@ +server.port=8080 +cda.bqTable=broad-dsde-dev.cda_dev +cda.datasetVersion=all_v2_1 +project=broad-dsde-dev +bqTable=cda_dev + +server.connection-timeout=300s \ No newline at end of file diff --git a/src/main/resources/application-local.properties b/src/main/resources/application-local.properties new file mode 100644 index 00000000..d5071807 --- /dev/null +++ b/src/main/resources/application-local.properties @@ -0,0 +1,8 @@ +server.port=8080 +cda.bqTable=gdc-bq-sample.integration +cda.datasetVersion=gdc_pdc_idc_v2_1 +project=gdc-bq-sample +bqTable=integration +datasetVersion=gdc_pdc_idc_v2_1 + +server.connection-timeout=300s diff --git a/src/main/resources/application-prod.properties b/src/main/resources/application-prod.properties new file mode 100644 index 00000000..eef98b72 --- /dev/null +++ b/src/main/resources/application-prod.properties @@ -0,0 +1,8 @@ +server.port=8080 +cda.bqTable=broad-dsde-dev.cda_dev +cda.datasetVersion=all_v2 +project=broad-dsde-dev +bqTable=cda_dev +datasetVersion=all_v2_1 + +server.connection-timeout=300s \ No newline at end of file diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 5a47e9ac..55d3aece 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,3 +1,7 @@ server.port=8080 -cda.bqTable=gdc-bq-sample.integration -cda.datasetVersion=all_v2 +cda.bqTable=broad-dsde-dev.cda_dev +cda.datasetVersion=all_v2_1 +project=broad-dsde-dev +bqTable=cda_dev + +server.connection-timeout=300s diff --git a/src/main/resources/schema/GDC_Files.json b/src/main/resources/schema/GDC_Files.json new file mode 100644 index 00000000..b669b699 --- /dev/null +++ b/src/main/resources/schema/GDC_Files.json @@ -0,0 +1,474 @@ +[ + { + "description": "The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "Short name or abbreviation for dataset. Maps to rdfs:label.", + "mode": "NULLABLE", + "name": "label", + "type": "STRING" + }, + { + "description": " ", + "mode": "NULLABLE", + "name": "data_category", + "type": "STRING" + }, + { + "description": " ", + "mode": "NULLABLE", + "name": "data_type", + "type": "STRING" + }, + { + "description": "String to identify the full file extension including compression extensions.", + "mode": "NULLABLE", + "name": "file_format", + "type": "STRING" + }, + { + "description": "A reference to the Project(s) of which this ResearchSubject is a member. The associated_project may be embedded using the $ref definition or may be a reference to the id for the Project - or a URI expressed as a string to an existing entity.", + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "drs_uri", + "type": "STRING" + }, + { + "description": "Size of the file in bytes. Maps to dcat:byteSize.", + "mode": "NULLABLE", + "name": "byte_size", + "type": "INTEGER" + }, + { + "description": " ", + "mode": "NULLABLE", + "name": "checksum", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "data_modality", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "imaging_modality", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "dbgap_accession_number", + "type": "STRING" + }, + { + "description": "A subject entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subject\u2019s privacy.", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity. ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "species", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "sex", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "race", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "ethnicity", + "type": "STRING" + }, + { + "description": "Per GDC Dictionary, number of days between the date used for index and the date from a person's date of birth represented as a calculated negative number of days.", + "mode": "NULLABLE", + "name": "days_to_birth", + "type": "INTEGER" + }, + { + "description": "", + "mode": "REPEATED", + "name": "subject_associated_project", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "vital_status", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "age_at_death", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "cause_of_death", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "Subject", + "type": "RECORD" + }, + { + "description": "A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subject\u2019s privacy. This entity plays the role of the case_id in existing data.", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system. For CDA, this is case_id.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity. ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "member_of_research_project", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_diagnosis_condition", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_diagnosis_site", + "type": "STRING" + }, + { + "description": "", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_diagnosis", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "age_at_diagnosis", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "morphology", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "stage", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "grade", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "method_of_diagnosis", + "type": "STRING" + }, + { + "description": "", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "Text name for treatment type; this will ultimately be defined by a common vocabulary", + "mode": "NULLABLE", + "name": "treatment_type", + "type": "STRING" + }, + { + "description": "Text name for treatment outcome; this will ultimately be defined by a common vocabulary", + "mode": "NULLABLE", + "name": "treatment_outcome", + "type": "STRING" + }, + { + "description": "The date and optionally time that the treatment was started in integer.", + "mode": "NULLABLE", + "name": "days_to_treatment_start", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "days_to_treatment_end", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "therapeutic_agent", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "treatment_anatomic_site", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "treatment_effect", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "treatment_end_reason", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "number_of_cycles", + "type": "INTEGER" + } + ], + "mode": "REPEATED", + "name": "Treatment", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Diagnosis", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "ResearchSubject", + "type": "RECORD" + }, + { + "description": "Any material taken as a sample from a biological entity (living or dead), or from a physical object or the environment. Specimens are usually collected as an example of their kind, often for use in some investigation.", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "description": "The age of the Patient when this sample was taken.", + "mode": "NULLABLE", + "name": "age_at_collection", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_disease_type", + "type": "STRING" + }, + { + "description": "Per GDC Dictionary, the text term that represents the name of the primary disease site of the submitted tumor sample; recommend dropping tumor; biospecimen_anatomic_site.", + "mode": "NULLABLE", + "name": "anatomical_site", + "type": "STRING" + }, + { + "description": "The general kind of material from which the specimen was derived, indicating the physical nature of the source material. ", + "mode": "NULLABLE", + "name": "source_material_type", + "type": "STRING" + }, + { + "description": "The high-level type of the specimen, based on its how it has been derived from the original extracted sample. \n", + "mode": "NULLABLE", + "name": "specimen_type", + "type": "STRING" + }, + { + "description": "A source/parent specimen from which this one was directly derived.", + "mode": "NULLABLE", + "name": "derived_from_specimen", + "type": "STRING" + }, + { + "description": "The Patient/ResearchSubject, or Biologically Derived Materal (e.g. a cell line, tissue culture, organoid) from which the specimen was directly or indirectly derived.", + "mode": "NULLABLE", + "name": "derived_from_subject", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "Specimen", + "type": "RECORD" + } +] diff --git a/src/main/resources/schema/GDC_Subjects.json b/src/main/resources/schema/GDC_Subjects.json new file mode 100644 index 00000000..1ed3d04c --- /dev/null +++ b/src/main/resources/schema/GDC_Subjects.json @@ -0,0 +1,392 @@ +[ + { + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity. ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "species", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "sex", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "race", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "ethnicity", + "type": "STRING" + }, + { + "description": "Per GDC Dictionary, number of days between the date used for index and the date from a person's date of birth represented as a calculated negative number of days.", + "mode": "NULLABLE", + "name": "days_to_birth", + "type": "INTEGER" + }, + { + "description": "", + "mode": "REPEATED", + "name": "subject_associated_project", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "vital_status", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "age_at_death", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "cause_of_death", + "type": "STRING" + }, + { + "description": "List of ids of File entities associated with the Patient", + "mode": "REPEATED", + "name": "Files", + "type": "STRING" + }, + { + "description": "A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subject\u2019s privacy. This entity plays the role of the case_id in existing data.", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system. For CDA, this is case_id.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity. ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "member_of_research_project", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_diagnosis_condition", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_diagnosis_site", + "type": "STRING" + }, + { + "description": "List of ids of File entities associated with the ResearchSubject", + "mode": "REPEATED", + "name": "Files", + "type": "STRING" + }, + { + "description": "", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_diagnosis", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "age_at_diagnosis", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "morphology", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "stage", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "grade", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "method_of_diagnosis", + "type": "STRING" + }, + { + "description": "", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "Text name for treatment type; this will ultimately be defined by a common vocabulary", + "mode": "NULLABLE", + "name": "treatment_type", + "type": "STRING" + }, + { + "description": "Text name for treatment outcome; this will ultimately be defined by a common vocabulary", + "mode": "NULLABLE", + "name": "treatment_outcome", + "type": "STRING" + }, + { + "description": "The date and optionally time that the treatment was started in integer.", + "mode": "NULLABLE", + "name": "days_to_treatment_start", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "days_to_treatment_end", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "therapeutic_agent", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "treatment_anatomic_site", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "treatment_effect", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "treatment_end_reason", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "number_of_cycles", + "type": "INTEGER" + } + ], + "mode": "REPEATED", + "name": "Treatment", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Diagnosis", + "type": "RECORD" + }, + { + "description": "Any material taken as a sample from a biological entity (living or dead), or from a physical object or the environment. Specimens are usually collected as an example of their kind, often for use in some investigation.", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "description": "The age of the Patient when this sample was taken.", + "mode": "NULLABLE", + "name": "age_at_collection", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_disease_type", + "type": "STRING" + }, + { + "description": "Per GDC Dictionary, the text term that represents the name of the primary disease site of the submitted tumor sample; recommend dropping tumor; biospecimen_anatomic_site.", + "mode": "NULLABLE", + "name": "anatomical_site", + "type": "STRING" + }, + { + "description": "The general kind of material from which the specimen was derived, indicating the physical nature of the source material. ", + "mode": "NULLABLE", + "name": "source_material_type", + "type": "STRING" + }, + { + "description": "The high-level type of the specimen, based on its how it has been derived from the original extracted sample. \n", + "mode": "NULLABLE", + "name": "specimen_type", + "type": "STRING" + }, + { + "description": "A source/parent specimen from which this one was directly derived.", + "mode": "NULLABLE", + "name": "derived_from_specimen", + "type": "STRING" + }, + { + "description": "The Patient/ResearchSubject, or Biologically Derived Materal (e.g. a cell line, tissue culture, organoid) from which the specimen was directly or indirectly derived.", + "mode": "NULLABLE", + "name": "derived_from_subject", + "type": "STRING" + }, + { + "description": "List of ids of File entities associated with the Specimen", + "mode": "REPEATED", + "name": "Files", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "Specimen", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "ResearchSubject", + "type": "RECORD" + } +] diff --git a/src/main/resources/schema/GDC_Subjects_NoFiles.json b/src/main/resources/schema/GDC_Subjects_NoFiles.json new file mode 100644 index 00000000..ad3deb09 --- /dev/null +++ b/src/main/resources/schema/GDC_Subjects_NoFiles.json @@ -0,0 +1,315 @@ +[ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "species", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "sex", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "race", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "ethnicity", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_birth", + "type": "INTEGER" + }, + { + "mode": "REPEATED", + "name": "subject_associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "vital_status", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_death", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "cause_of_death", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "member_of_research_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_condition", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_site", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_diagnosis", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "morphology", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "stage", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "grade", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "method_of_diagnosis", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "treatment_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_outcome", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_start", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_end", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "therapeutic_agent", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_anatomic_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_effect", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_end_reason", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "number_of_cycles", + "type": "INTEGER" + } + ], + "mode": "REPEATED", + "name": "Treatment", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Diagnosis", + "type": "RECORD" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_collection", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "primary_disease_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "anatomical_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "source_material_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "specimen_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_specimen", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_subject", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "Specimen", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "ResearchSubject", + "type": "RECORD" + } +] diff --git a/src/main/resources/schema/all_v2_1.json b/src/main/resources/schema/all_v2_1.json new file mode 100644 index 00000000..30efae2f --- /dev/null +++ b/src/main/resources/schema/all_v2_1.json @@ -0,0 +1,567 @@ +[ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "species", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "sex", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "race", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "ethnicity", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_birth", + "type": "INTEGER" + }, + { + "mode": "REPEATED", + "name": "subject_associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "vital_status", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_death", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "cause_of_death", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "label", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_category", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "file_format", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "drs_uri", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "byte_size", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "checksum", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "imaging_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "dbgap_accession_number", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "File", + "type": "RECORD" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "member_of_research_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_condition", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_site", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_diagnosis", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "morphology", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "stage", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "grade", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "method_of_diagnosis", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "treatment_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_outcome", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_start", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_end", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "therapeutic_agent", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_anatomic_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_effect", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_end_reason", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "number_of_cycles", + "type": "INTEGER" + } + ], + "mode": "REPEATED", + "name": "Treatment", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Diagnosis", + "type": "RECORD" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "label", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_category", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "file_format", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "drs_uri", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "byte_size", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "checksum", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "imaging_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "dbgap_accession_number", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "File", + "type": "RECORD" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_collection", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "primary_disease_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "anatomical_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "source_material_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "specimen_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_specimen", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_subject", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "label", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_category", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "file_format", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "drs_uri", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "byte_size", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "checksum", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "imaging_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "dbgap_accession_number", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "File", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Specimen", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "ResearchSubject", + "type": "RECORD" + } +] diff --git a/src/main/resources/schema/all_v3_0_Files.json b/src/main/resources/schema/all_v3_0_Files.json new file mode 100644 index 00000000..b669b699 --- /dev/null +++ b/src/main/resources/schema/all_v3_0_Files.json @@ -0,0 +1,474 @@ +[ + { + "description": "The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "Short name or abbreviation for dataset. Maps to rdfs:label.", + "mode": "NULLABLE", + "name": "label", + "type": "STRING" + }, + { + "description": " ", + "mode": "NULLABLE", + "name": "data_category", + "type": "STRING" + }, + { + "description": " ", + "mode": "NULLABLE", + "name": "data_type", + "type": "STRING" + }, + { + "description": "String to identify the full file extension including compression extensions.", + "mode": "NULLABLE", + "name": "file_format", + "type": "STRING" + }, + { + "description": "A reference to the Project(s) of which this ResearchSubject is a member. The associated_project may be embedded using the $ref definition or may be a reference to the id for the Project - or a URI expressed as a string to an existing entity.", + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "drs_uri", + "type": "STRING" + }, + { + "description": "Size of the file in bytes. Maps to dcat:byteSize.", + "mode": "NULLABLE", + "name": "byte_size", + "type": "INTEGER" + }, + { + "description": " ", + "mode": "NULLABLE", + "name": "checksum", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "data_modality", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "imaging_modality", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "dbgap_accession_number", + "type": "STRING" + }, + { + "description": "A subject entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subject\u2019s privacy.", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity. ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "species", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "sex", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "race", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "ethnicity", + "type": "STRING" + }, + { + "description": "Per GDC Dictionary, number of days between the date used for index and the date from a person's date of birth represented as a calculated negative number of days.", + "mode": "NULLABLE", + "name": "days_to_birth", + "type": "INTEGER" + }, + { + "description": "", + "mode": "REPEATED", + "name": "subject_associated_project", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "vital_status", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "age_at_death", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "cause_of_death", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "Subject", + "type": "RECORD" + }, + { + "description": "A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subject\u2019s privacy. This entity plays the role of the case_id in existing data.", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system. For CDA, this is case_id.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity. ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "member_of_research_project", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_diagnosis_condition", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_diagnosis_site", + "type": "STRING" + }, + { + "description": "", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_diagnosis", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "age_at_diagnosis", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "morphology", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "stage", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "grade", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "method_of_diagnosis", + "type": "STRING" + }, + { + "description": "", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "Text name for treatment type; this will ultimately be defined by a common vocabulary", + "mode": "NULLABLE", + "name": "treatment_type", + "type": "STRING" + }, + { + "description": "Text name for treatment outcome; this will ultimately be defined by a common vocabulary", + "mode": "NULLABLE", + "name": "treatment_outcome", + "type": "STRING" + }, + { + "description": "The date and optionally time that the treatment was started in integer.", + "mode": "NULLABLE", + "name": "days_to_treatment_start", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "days_to_treatment_end", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "therapeutic_agent", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "treatment_anatomic_site", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "treatment_effect", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "treatment_end_reason", + "type": "STRING" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "number_of_cycles", + "type": "INTEGER" + } + ], + "mode": "REPEATED", + "name": "Treatment", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Diagnosis", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "ResearchSubject", + "type": "RECORD" + }, + { + "description": "Any material taken as a sample from a biological entity (living or dead), or from a physical object or the environment. Specimens are usually collected as an example of their kind, often for use in some investigation.", + "fields": [ + { + "description": "The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.", + "mode": "REQUIRED", + "name": "id", + "type": "STRING" + }, + { + "description": "A 'business' identifier or accession number for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). ", + "fields": [ + { + "description": "The system or namespace that defines the identifier.", + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "description": "The value of the identifier, as defined by the system.", + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "description": "The age of the Patient when this sample was taken.", + "mode": "NULLABLE", + "name": "age_at_collection", + "type": "INTEGER" + }, + { + "description": "", + "mode": "NULLABLE", + "name": "primary_disease_type", + "type": "STRING" + }, + { + "description": "Per GDC Dictionary, the text term that represents the name of the primary disease site of the submitted tumor sample; recommend dropping tumor; biospecimen_anatomic_site.", + "mode": "NULLABLE", + "name": "anatomical_site", + "type": "STRING" + }, + { + "description": "The general kind of material from which the specimen was derived, indicating the physical nature of the source material. ", + "mode": "NULLABLE", + "name": "source_material_type", + "type": "STRING" + }, + { + "description": "The high-level type of the specimen, based on its how it has been derived from the original extracted sample. \n", + "mode": "NULLABLE", + "name": "specimen_type", + "type": "STRING" + }, + { + "description": "A source/parent specimen from which this one was directly derived.", + "mode": "NULLABLE", + "name": "derived_from_specimen", + "type": "STRING" + }, + { + "description": "The Patient/ResearchSubject, or Biologically Derived Materal (e.g. a cell line, tissue culture, organoid) from which the specimen was directly or indirectly derived.", + "mode": "NULLABLE", + "name": "derived_from_subject", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "Specimen", + "type": "RECORD" + } +] diff --git a/src/main/resources/schema/all_v3_0_subjects_meta.json b/src/main/resources/schema/all_v3_0_subjects_meta.json new file mode 100644 index 00000000..ad3deb09 --- /dev/null +++ b/src/main/resources/schema/all_v3_0_subjects_meta.json @@ -0,0 +1,315 @@ +[ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "species", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "sex", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "race", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "ethnicity", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_birth", + "type": "INTEGER" + }, + { + "mode": "REPEATED", + "name": "subject_associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "vital_status", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_death", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "cause_of_death", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "member_of_research_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_condition", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_site", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_diagnosis", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "morphology", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "stage", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "grade", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "method_of_diagnosis", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "treatment_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_outcome", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_start", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_end", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "therapeutic_agent", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_anatomic_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_effect", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_end_reason", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "number_of_cycles", + "type": "INTEGER" + } + ], + "mode": "REPEATED", + "name": "Treatment", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Diagnosis", + "type": "RECORD" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_collection", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "primary_disease_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "anatomical_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "source_material_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "specimen_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_specimen", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_subject", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "Specimen", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "ResearchSubject", + "type": "RECORD" + } +] diff --git a/src/main/resources/schema/gdc_pdc_idc_v2_1.json b/src/main/resources/schema/gdc_pdc_idc_v2_1.json new file mode 100644 index 00000000..30efae2f --- /dev/null +++ b/src/main/resources/schema/gdc_pdc_idc_v2_1.json @@ -0,0 +1,567 @@ +[ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "species", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "sex", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "race", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "ethnicity", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_birth", + "type": "INTEGER" + }, + { + "mode": "REPEATED", + "name": "subject_associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "vital_status", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_death", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "cause_of_death", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "label", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_category", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "file_format", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "drs_uri", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "byte_size", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "checksum", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "imaging_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "dbgap_accession_number", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "File", + "type": "RECORD" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "member_of_research_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_condition", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_site", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_diagnosis", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "morphology", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "stage", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "grade", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "method_of_diagnosis", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "treatment_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_outcome", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_start", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_end", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "therapeutic_agent", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_anatomic_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_effect", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_end_reason", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "number_of_cycles", + "type": "INTEGER" + } + ], + "mode": "REPEATED", + "name": "Treatment", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Diagnosis", + "type": "RECORD" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "label", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_category", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "file_format", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "drs_uri", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "byte_size", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "checksum", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "imaging_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "dbgap_accession_number", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "File", + "type": "RECORD" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_collection", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "primary_disease_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "anatomical_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "source_material_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "specimen_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_specimen", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_subject", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "label", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_category", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "file_format", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "drs_uri", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "byte_size", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "checksum", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "data_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "imaging_modality", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "dbgap_accession_number", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "File", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Specimen", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "ResearchSubject", + "type": "RECORD" + } +] diff --git a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java index 110092fb..ed0cea3b 100644 --- a/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java +++ b/src/test/java/bio/terra/cda/app/controller/QueryApiControllerTest.java @@ -3,10 +3,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.only; -import static org.mockito.Mockito.reset; -import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.*; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; @@ -61,14 +58,14 @@ void booleanQueryDryRun() throws Exception { } @Test - public void uniqueValuesTest() throws Exception { - String version = "v3"; + void uniqueValuesTest() throws Exception { + String version = "all_v3_0_subjects_meta"; String system = "GDC"; String body = "sex"; - String table = "TABLE"; + String table = "default.dev"; var expected = - "SELECT DISTINCT sex FROM TABLE.v3, UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier WHERE _identifier.system = 'GDC' ORDER BY sex"; + "SELECT DISTINCT sex FROM default.dev.all_v3_0_subjects_meta, UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier WHERE _identifier.system = 'GDC' ORDER BY sex"; var result = mvc.perform( post("/api/v1/unique-values/{version}", version) diff --git a/src/test/java/bio/terra/cda/app/generators/SqlGeneratorTest.java b/src/test/java/bio/terra/cda/app/generators/SqlGeneratorTest.java new file mode 100644 index 00000000..fb575a41 --- /dev/null +++ b/src/test/java/bio/terra/cda/app/generators/SqlGeneratorTest.java @@ -0,0 +1,80 @@ +package bio.terra.cda.app.generators; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import bio.terra.cda.app.operators.QueryModule; +import bio.terra.cda.generated.model.Query; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.stream.Stream; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +class SqlGeneratorTest { + + static final Path TEST_FILES = Paths.get("src/test/resources/query"); + + public static final String TABLE = "TABLE"; + public static final String QUALIFIED_TABLE = "GROUP." + TABLE; + + private final ObjectMapper objectMapper = new ObjectMapper().registerModule(new QueryModule()); + + private static Stream queryData() { + return Stream.of( + Arguments.of( + "query1.json", + QUALIFIED_TABLE, + TABLE, + "SELECT %2$s.* FROM %1$s AS %2$s WHERE (UPPER(%2$s.A) = UPPER('value'))"), + Arguments.of( + "query2.json", + QUALIFIED_TABLE, + TABLE, + "SELECT %2$s.* FROM %1$s AS %2$s, UNNEST(%2$s.B) AS _B, UNNEST(_B.BB) AS _B_BB, " + + "UNNEST(%2$s.A1) AS _A1 WHERE (((_B.BA >= 50) AND " + + "(UPPER(_B_BB.BBB) = UPPER('value'))) AND (UPPER(_A1.A1A) = UPPER('value')))"), + Arguments.of( + "query3.json", + QUALIFIED_TABLE, + TABLE, + "SELECT %2$s.* FROM %1$s AS %2$s, UNNEST(%2$s.B) AS _B, UNNEST(_B.BB) AS _B_BB, " + + "UNNEST(_B_BB.BBD) AS _B_BB_BBD, UNNEST(_B_BB_BBD.BBDD) AS _B_BB_BBD_BBDD WHERE (_B_BB_BBD_BBDD.BBDDE = 50)"), + Arguments.of( + "query-subquery.json", + "GROUP.all_v3_0_subjects_meta", + "all_v3_0_subjects_meta", + "SELECT %2$s.* FROM " + + "(SELECT %2$s.* FROM %1$s AS %2$s, UNNEST(%2$s.ResearchSubject) AS _ResearchSubject, " + + "UNNEST(_ResearchSubject.identifier) AS _ResearchSubject_identifier " + + "WHERE (UPPER(_ResearchSubject_identifier.system) = UPPER('PDC'))) AS %2$s," + + " UNNEST(%2$s.ResearchSubject) AS _ResearchSubject, " + + "UNNEST(_ResearchSubject.identifier) AS _ResearchSubject_identifier WHERE (UPPER(_ResearchSubject_identifier.system) = UPPER('GDC'))"), + Arguments.of( + "query-not.json", + QUALIFIED_TABLE, + TABLE, + "SELECT %2$s.* FROM %1$s AS %2$s, UNNEST(%2$s.A1) AS _A1 WHERE (NOT (1 = _A1.ANUM))"), + Arguments.of( + "query-ambiguous.json", + QUALIFIED_TABLE, + TABLE, + "SELECT %2$s.* FROM (SELECT %2$s.* FROM %1$s AS %2$s WHERE (UPPER(%2$s.A) = UPPER('that'))) AS %2$s WHERE (UPPER(%2$s.A) = UPPER('this'))")); + } + + @ParameterizedTest + @MethodSource("queryData") + void testQuery(String queryFile, String qualifiedTable, String table, String expectedQueryFormat) + throws Exception { + String jsonQuery = Files.readString(TEST_FILES.resolve(queryFile)); + + String expectedSql = String.format(expectedQueryFormat, qualifiedTable, table); + + Query query = objectMapper.readValue(jsonQuery, Query.class); + String translatedQuery = new SqlGenerator(qualifiedTable, query, table).generate(); + + assertEquals(expectedSql, translatedQuery); + } +} diff --git a/src/test/java/bio/terra/cda/app/service/QueryServiceTest.java b/src/test/java/bio/terra/cda/app/service/QueryServiceTest.java index 1c8c1ba8..f0c8a04f 100644 --- a/src/test/java/bio/terra/cda/app/service/QueryServiceTest.java +++ b/src/test/java/bio/terra/cda/app/service/QueryServiceTest.java @@ -12,10 +12,13 @@ import com.google.cloud.bigquery.StandardSQLTypeName; import java.util.List; import java.util.stream.Stream; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.springframework.stereotype.Component; +@Component class QueryServiceTest { private final QueryService queryService = new QueryService(new ObjectMapper()); @@ -48,6 +51,7 @@ private static Stream valueToJson() { "[123,456]")); } + @Disabled @ParameterizedTest @MethodSource("valueToJson") void testValueToJson(FieldValue value, Field field, String expected) { diff --git a/src/test/java/bio/terra/cda/app/util/NestedColumnTest.java b/src/test/java/bio/terra/cda/app/util/NestedColumnTest.java index c1d77bc7..55c9dc4a 100644 --- a/src/test/java/bio/terra/cda/app/util/NestedColumnTest.java +++ b/src/test/java/bio/terra/cda/app/util/NestedColumnTest.java @@ -22,7 +22,7 @@ private static Stream unnestData() { @ParameterizedTest @MethodSource("unnestData") - public void testGeneratedUnnestClause(String qualifiedName, String column, String clause) + void testGeneratedUnnestClause(String qualifiedName, String column, String clause) throws Exception { NestedColumn result = NestedColumn.generate(qualifiedName); StringBuffer unnestClause = new StringBuffer(); @@ -33,7 +33,7 @@ public void testGeneratedUnnestClause(String qualifiedName, String column, Strin } @Test - public void testIllegalArgCondition() throws Exception { + void testIllegalArgCondition() throws Exception { assertThrows(IllegalArgumentException.class, () -> NestedColumn.generate(null)); } } diff --git a/src/test/java/bio/terra/cda/app/util/QueryTranslatorTest.java b/src/test/java/bio/terra/cda/app/util/QueryTranslatorTest.java deleted file mode 100644 index c4007e7a..00000000 --- a/src/test/java/bio/terra/cda/app/util/QueryTranslatorTest.java +++ /dev/null @@ -1,118 +0,0 @@ -package bio.terra.cda.app.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import bio.terra.cda.generated.model.Query; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import org.junit.jupiter.api.Test; - -class QueryTranslatorTest { - - static final Path TEST_FILES = Paths.get("src/test/resources/query"); - - public static final String TABLE = "TABLE"; - public static final String QUALIFIED_TABLE = "GROUP." + TABLE; - - private final ObjectMapper objectMapper = new ObjectMapper(); - - @Test - public void testQuerySimple() throws Exception { - String jsonQuery = Files.readString(TEST_FILES.resolve("query1.json")); - - String expectedSql = - String.format( - "SELECT %2$s.* FROM %1$s AS %2$s WHERE (%2$s.project_id = 'TCGA-OV')", - QUALIFIED_TABLE, TABLE); - - Query query = objectMapper.readValue(jsonQuery, Query.class); - String translatedQuery = QueryTranslator.sql(QUALIFIED_TABLE, query); - - assertEquals(expectedSql, translatedQuery); - } - - @Test - public void testQueryComplex() throws Exception { - String jsonQuery = Files.readString(TEST_FILES.resolve("query2.json")); - - String EXPECTED_SQL = - String.format( - "SELECT %2$s.* FROM %1$s AS %2$s, UNNEST(demographic) AS _demographic, UNNEST(project) AS _project, " - + "UNNEST(diagnoses) AS _diagnoses WHERE (((_demographic.age_at_index >= 50) AND " - + "(_project.project_id = 'TCGA-OV')) AND (_diagnoses.figo_stage = 'Stage IIIC'))", - QUALIFIED_TABLE, TABLE); - - Query query = objectMapper.readValue(jsonQuery, Query.class); - String translatedQuery = QueryTranslator.sql(QUALIFIED_TABLE, query); - - assertEquals(EXPECTED_SQL, translatedQuery); - } - - @Test - public void testQueryNested() throws Exception { - String jsonQuery = Files.readString(TEST_FILES.resolve("query3.json")); - - String expectedSql = - String.format( - "SELECT %2$s.* FROM %1$s AS %2$s, UNNEST(A) AS _A, UNNEST(_A.B) AS _B, " - + "UNNEST(_B.C) AS _C, UNNEST(_C.D) AS _D WHERE (_D.column = value)", - QUALIFIED_TABLE, TABLE); - - Query query = objectMapper.readValue(jsonQuery, Query.class); - String translatedQuery = QueryTranslator.sql(QUALIFIED_TABLE, query); - - assertEquals(expectedSql, translatedQuery); - } - - @Test - public void testQueryFrom() throws Exception { - String jsonQuery = Files.readString(TEST_FILES.resolve("query-subquery.json")); - - String expectedSql = - String.format( - "SELECT %2$s.* FROM " - + "(SELECT %2$s.* FROM %1$s AS %2$s, UNNEST(ResearchSubject) AS _ResearchSubject, " - + "UNNEST(_ResearchSubject.identifier) AS _identifier " - + "WHERE (_identifier.system = 'PDC')) AS %2$s," - + " UNNEST(ResearchSubject) AS _ResearchSubject, " - + "UNNEST(_ResearchSubject.identifier) AS _identifier WHERE (_identifier.system = 'GDC')", - QUALIFIED_TABLE, TABLE); - - Query query = objectMapper.readValue(jsonQuery, Query.class); - String translatedQuery = QueryTranslator.sql(QUALIFIED_TABLE, query); - - assertEquals(expectedSql, translatedQuery); - } - - @Test - public void testQueryNot() throws Exception { - String jsonQuery = Files.readString(TEST_FILES.resolve("query-not.json")); - - String expectedSql = - String.format( - "SELECT %2$s.* FROM %1$s AS %2$s, UNNEST(A) AS _A WHERE (NOT (1 = _A.B))", - QUALIFIED_TABLE, TABLE); - - Query query = objectMapper.readValue(jsonQuery, Query.class); - String translatedQuery = QueryTranslator.sql(QUALIFIED_TABLE, query); - - assertEquals(expectedSql, translatedQuery); - } - - @Test - public void testQueryAmbiguous() throws Exception { - String jsonQuery = Files.readString(TEST_FILES.resolve("query-ambiguous.json")); - - String expectedSql = - String.format( - "SELECT %2$s.* FROM (SELECT %2$s.* FROM %1$s AS %2$s WHERE (%2$s.id = 'that')) AS %2$s WHERE (%2$s.id = 'this')", - QUALIFIED_TABLE, TABLE); - - Query query = objectMapper.readValue(jsonQuery, Query.class); - String translatedQuery = QueryTranslator.sql(QUALIFIED_TABLE, query); - - assertEquals(expectedSql, translatedQuery); - } -} diff --git a/src/test/resources/query/query-ambiguous.json b/src/test/resources/query/query-ambiguous.json index 094ae868..f9a071ad 100644 --- a/src/test/resources/query/query-ambiguous.json +++ b/src/test/resources/query/query-ambiguous.json @@ -4,7 +4,7 @@ "node_type": "=", "l": { "node_type": "column", - "value": "id" + "value": "A" }, "r": { "node_type": "quoted", @@ -15,7 +15,7 @@ "node_type": "=", "l": { "node_type": "column", - "value": "id" + "value": "A" }, "r": { "node_type": "quoted", diff --git a/src/test/resources/query/query-not.json b/src/test/resources/query/query-not.json index f16ae227..2dc30c2d 100644 --- a/src/test/resources/query/query-not.json +++ b/src/test/resources/query/query-not.json @@ -8,7 +8,7 @@ }, "r": { "node_type": "column", - "value": "A.B" + "value": "A1.ANUM" } } } diff --git a/src/test/resources/query/query1.json b/src/test/resources/query/query1.json index 7b405a79..513f5e45 100644 --- a/src/test/resources/query/query1.json +++ b/src/test/resources/query/query1.json @@ -2,10 +2,10 @@ "node_type": "=", "l": { "node_type": "column", - "value": "project_id" + "value": "A" }, "r": { "node_type": "quoted", - "value": "TCGA-OV" + "value": "value" } } \ No newline at end of file diff --git a/src/test/resources/query/query2.json b/src/test/resources/query/query2.json index 0436b52d..27e10366 100644 --- a/src/test/resources/query/query2.json +++ b/src/test/resources/query/query2.json @@ -6,7 +6,7 @@ "node_type": ">=", "l": { "node_type": "column", - "value": "demographic.age_at_index" + "value": "B.BA" }, "r": { "node_type": "unquoted", @@ -17,11 +17,11 @@ "node_type": "=", "l": { "node_type": "column", - "value": "project.project_id" + "value": "B.BB.BBB" }, "r": { "node_type": "quoted", - "value": "TCGA-OV" + "value": "value" } } }, @@ -29,11 +29,11 @@ "node_type": "=", "l": { "node_type": "column", - "value": "diagnoses.figo_stage" + "value": "A1.A1A" }, "r": { "node_type": "quoted", - "value": "Stage IIIC" + "value": "value" } } } \ No newline at end of file diff --git a/src/test/resources/query/query3.json b/src/test/resources/query/query3.json index af94b16e..0bb1a083 100644 --- a/src/test/resources/query/query3.json +++ b/src/test/resources/query/query3.json @@ -2,10 +2,10 @@ "node_type": "=", "l": { "node_type": "column", - "value": "A.B.C.D.column" + "value": "B.BB.BBD.BBDD.BBDDE" }, "r": { "node_type": "unquoted", - "value": "value" + "value": "50" } } \ No newline at end of file diff --git a/src/test/resources/schema/TABLE.json b/src/test/resources/schema/TABLE.json new file mode 100644 index 00000000..26857d12 --- /dev/null +++ b/src/test/resources/schema/TABLE.json @@ -0,0 +1,77 @@ +[ + { + "mode": "NULLABLE", + "name": "A", + "type": "STRING" + }, + { + "mode": "REPEATED", + "name": "A1", + "type": "RECORD", + "fields": [ + { + "mode": "NULLABLE", + "name": "A1A", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "ANUM", + "type": "INTEGER" + } + ] + }, + { + "mode": "REPEATED", + "name": "B", + "type": "RECORD", + "fields": [ + { + "mode": "NULLABLE", + "name": "BA", + "type": "INTEGER" + }, + { + "mode": "REPEATED", + "name": "BB", + "type": "RECORD", + "fields": [ + { + "mode": "NULLABLE", + "name": "BBA", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "BBB", + "type": "STRING" + }, + { + "mode": "REPEATED", + "name": "BBC", + "type": "STRING" + }, + { + "mode": "REPEATED", + "name": "BBD", + "type": "RECORD", + "fields": [ + { + "mode": "REPEATED", + "name": "BBDD", + "type": "RECORD", + "fields": [ + { + "mode": "NULLABLE", + "name": "BBDDE", + "type": "INTEGER" + } + ] + } + ] + } + ] + } + ] + } +] diff --git a/src/test/resources/schema/all_v3_0_subjects_meta.json b/src/test/resources/schema/all_v3_0_subjects_meta.json new file mode 100644 index 00000000..ad3deb09 --- /dev/null +++ b/src/test/resources/schema/all_v3_0_subjects_meta.json @@ -0,0 +1,315 @@ +[ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "species", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "sex", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "race", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "ethnicity", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_birth", + "type": "INTEGER" + }, + { + "mode": "REPEATED", + "name": "subject_associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "vital_status", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_death", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "cause_of_death", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "member_of_research_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_condition", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis_site", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "primary_diagnosis", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_diagnosis", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "morphology", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "stage", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "grade", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "method_of_diagnosis", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "treatment_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_outcome", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_start", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "days_to_treatment_end", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "therapeutic_agent", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_anatomic_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_effect", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "treatment_end_reason", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "number_of_cycles", + "type": "INTEGER" + } + ], + "mode": "REPEATED", + "name": "Treatment", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "Diagnosis", + "type": "RECORD" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "id", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "system", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "identifier", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "associated_project", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "age_at_collection", + "type": "INTEGER" + }, + { + "mode": "NULLABLE", + "name": "primary_disease_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "anatomical_site", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "source_material_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "specimen_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_specimen", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "derived_from_subject", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "Specimen", + "type": "RECORD" + } + ], + "mode": "REPEATED", + "name": "ResearchSubject", + "type": "RECORD" + } +] From e37ab7e30793172e906182e5cb25d11e1874a432 Mon Sep 17 00:00:00 2001 From: Frederick Kaufman <84089414+fkaufman-asym@users.noreply.github.com> Date: Thu, 17 Mar 2022 16:00:53 -0400 Subject: [PATCH 12/18] update to local properties (#157) --- build.gradle | 2 +- src/main/resources/api/service_openapi.yaml | 6 +++--- src/main/resources/application-dev.properties | 3 +-- src/main/resources/application-local.properties | 8 ++++---- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/build.gradle b/build.gradle index 814bcd9f..491e8d54 100644 --- a/build.gradle +++ b/build.gradle @@ -150,7 +150,7 @@ jib { image = 'us.gcr.io/broad-dsp-gcr-public/base/jre:11-distroless' } container { - environment = [ 'spring.profiles.active':'dev' ] + environment = [ 'spring.profiles.active':'local' ] } } diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index 1bf300a1..1af043ef 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -278,7 +278,7 @@ components: required: true schema: type: string - default: all_v2_1 + default: all_v3_0_subjects_meta description: Dataset version SystemValue: in: query @@ -291,7 +291,7 @@ components: name: table name schema: type: string - default: broad-dsde-dev.cda_dev + default: gdc-bq-sample.dev description: Filter on system for results ResultOffset: in: query @@ -319,7 +319,7 @@ components: name: table schema: type: string - default: broad-dsde-dev.cda_dev + default: gdc-bq-sample.dev description: tablename schemas: diff --git a/src/main/resources/application-dev.properties b/src/main/resources/application-dev.properties index 6c7758ad..3d7ea20a 100644 --- a/src/main/resources/application-dev.properties +++ b/src/main/resources/application-dev.properties @@ -1,7 +1,6 @@ server.port=8080 -cda.bqTable=broad-dsde-dev.cda_dev +cda.bqTable=gdc-bq-sample.dev cda.datasetVersion=all_v2_1 project=broad-dsde-dev bqTable=cda_dev - server.connection-timeout=300s \ No newline at end of file diff --git a/src/main/resources/application-local.properties b/src/main/resources/application-local.properties index d5071807..33d9c5e4 100644 --- a/src/main/resources/application-local.properties +++ b/src/main/resources/application-local.properties @@ -1,8 +1,8 @@ server.port=8080 -cda.bqTable=gdc-bq-sample.integration -cda.datasetVersion=gdc_pdc_idc_v2_1 +cda.bqTable=gdc-bq-sample.dev +cda.datasetVersion=all_v3_0_subjects_meta project=gdc-bq-sample -bqTable=integration -datasetVersion=gdc_pdc_idc_v2_1 +bqTable=dev +datasetVersion=all_v3_0_subjects_meta server.connection-timeout=300s From dda5256af3e8e18d272f93ea973e497b194da4c9 Mon Sep 17 00:00:00 2001 From: Dion Boles <82461519+dionboles-asym@users.noreply.github.com> Date: Thu, 17 Mar 2022 16:37:31 -0400 Subject: [PATCH 13/18] updated application props (#158) --- .../app/controller/QueryApiController.java | 6 ++--- .../terra/cda/app/service/QueryService.java | 26 ++++++++----------- src/main/resources/application-dev.properties | 4 +-- src/main/resources/application.properties | 11 ++++---- 4 files changed, 22 insertions(+), 25 deletions(-) diff --git a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java index 3a0834b9..3b83706d 100644 --- a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java +++ b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java @@ -87,9 +87,9 @@ public ResponseEntity jobStatus(String id) { private ResponseEntity sendQuery(String querySql, boolean dryRun) { var response = new QueryCreatedData().querySql(querySql); - if (!querySql.contains(applicationConfiguration.getProject())) { - throw new IllegalArgumentException("Your database is outside of the project"); - } + // if (!querySql.contains(applicationConfiguration.getProject())) { + // throw new IllegalArgumentException("Your database is outside of the project"); + // } var lowerCaseQuery = querySql.toLowerCase(); try { diff --git a/src/main/java/bio/terra/cda/app/service/QueryService.java b/src/main/java/bio/terra/cda/app/service/QueryService.java index 5260e606..0683cc0b 100644 --- a/src/main/java/bio/terra/cda/app/service/QueryService.java +++ b/src/main/java/bio/terra/cda/app/service/QueryService.java @@ -17,9 +17,7 @@ import com.google.cloud.bigquery.*; import com.google.common.annotations.VisibleForTesting; import java.text.SimpleDateFormat; -import java.time.Instant; import java.util.*; -import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -298,21 +296,19 @@ private void logQuery(Job queryJob, List jsonData) { public String startQuery(String query) { String jobID = UUID.randomUUID().toString(); - String destinationDataset = "Job_Queue"; - String destinationTable = String.format("Job_%s", jobID); - TableId tableId = TableId.of(destinationDataset, destinationTable); - TableDefinition tableDefinition = StandardTableDefinition.of(Schema.of()); - TableInfo tableInfo = - TableInfo.newBuilder(tableId, tableDefinition) - .setExpirationTime(Instant.now().toEpochMilli() + TimeUnit.MINUTES.toMillis(10)) - .build(); + // String destinationDataset = "Job_Queue"; + // String destinationTable = String.format("Job_%s", jobID); + // TableId tableId = TableId.of(destinationDataset, destinationTable); + // TableDefinition tableDefinition = StandardTableDefinition.of(Schema.of()); + // TableInfo tableInfo = + // TableInfo.newBuilder(tableId, tableDefinition) + // .setExpirationTime(Instant.now().toEpochMilli() + TimeUnit.MINUTES.toMillis(10)) + // .build(); QueryJobConfiguration.Builder queryConfig = - QueryJobConfiguration.newBuilder(query) - .setUseLegacySql(false) - .setUseQueryCache(true) - .setAllowLargeResults(true) - .setDestinationTable(tableInfo.getTableId()); + QueryJobConfiguration.newBuilder(query).setUseLegacySql(false).setUseQueryCache(true); + // .setAllowLargeResults(true); + // .setDestinationTable(tableInfo.getTableId()); // Create a job ID so that we can safely retry. JobId jobId = JobId.of(jobID); /** diff --git a/src/main/resources/application-dev.properties b/src/main/resources/application-dev.properties index 3d7ea20a..250ad5d3 100644 --- a/src/main/resources/application-dev.properties +++ b/src/main/resources/application-dev.properties @@ -1,6 +1,6 @@ server.port=8080 cda.bqTable=gdc-bq-sample.dev -cda.datasetVersion=all_v2_1 -project=broad-dsde-dev +cda.datasetVersion=all_v3_0_subjects_meta +project=gdc-bq-sample.dev bqTable=cda_dev server.connection-timeout=300s \ No newline at end of file diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 55d3aece..13467210 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,7 +1,8 @@ server.port=8080 -cda.bqTable=broad-dsde-dev.cda_dev -cda.datasetVersion=all_v2_1 -project=broad-dsde-dev -bqTable=cda_dev +cda.bqTable=gdc-bq-sample.dev +cda.datasetVersion=all_v3_0_subjects_meta +project=gdc-bq-sample +bqTable=dev +datasetVersion=all_v3_0_subjects_meta -server.connection-timeout=300s +server.connection-timeout=300s \ No newline at end of file From bce10f4c0eb133c6087e6bf1cb02ecd0ecf8caa1 Mon Sep 17 00:00:00 2001 From: Frederick Kaufman Date: Thu, 24 Mar 2022 14:30:32 -0400 Subject: [PATCH 14/18] flatten json output to csv --- .gitignore | 1 + build.gradle | 2 + jsonOutput-8.csv | 29 ++ .../app/controller/QueryApiController.java | 11 +- .../terra/cda/app/flatten/JsonFlattener.java | 471 ++++++++++++++++++ .../bio/terra/cda/app/flatten/OrderJson.java | 71 +++ .../bio/terra/cda/app/flatten/model/Row.java | 21 + .../terra/cda/app/service/QueryService.java | 39 +- src/main/resources/api/service_openapi.yaml | 33 +- .../terra/cda/app/flatten/FlattenTest.java | 101 ++++ .../terra/cda/app/flatten/model/RowTest.java | 17 + src/test/resources/csv_files/jsonOutput-7.csv | 29 ++ .../resources/json_files/gdc_no_files1.json | 1 + 13 files changed, 813 insertions(+), 13 deletions(-) create mode 100644 jsonOutput-8.csv create mode 100644 src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java create mode 100644 src/main/java/bio/terra/cda/app/flatten/OrderJson.java create mode 100644 src/main/java/bio/terra/cda/app/flatten/model/Row.java create mode 100644 src/test/java/bio/terra/cda/app/flatten/FlattenTest.java create mode 100644 src/test/java/bio/terra/cda/app/flatten/model/RowTest.java create mode 100644 src/test/resources/csv_files/jsonOutput-7.csv create mode 100644 src/test/resources/json_files/gdc_no_files1.json diff --git a/.gitignore b/.gitignore index ecf552d9..572cbef8 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ local-dev/ !gradle/wrapper/gradle-wrapper.jar !**/src/main/** !**/src/test/** +*.csv ### STS ### .apt_generated diff --git a/build.gradle b/build.gradle index 491e8d54..c3e07ae6 100644 --- a/build.gradle +++ b/build.gradle @@ -51,6 +51,8 @@ dependencies { implementation group: 'org.webjars', name: 'swagger-ui', version: '3.24.0' implementation group: 'org.aspectj', name: 'aspectjweaver', version: '1.8.8' implementation group: 'com.google.cloud', name: 'google-cloud-bigquery', version: '1.124.7' + implementation group: 'com.google.code.gson', name: 'gson', version: '2.8.5' + implementation group: 'com.jayway.jsonpath', name: 'json-path', version: '2.4.0' // These dependencies are required to load the logback config file. implementation group: 'org.codehaus.groovy', name: 'groovy', version: '3.0.7' diff --git a/jsonOutput-8.csv b/jsonOutput-8.csv new file mode 100644 index 00000000..3c1cef3a --- /dev/null +++ b/jsonOutput-8.csv @@ -0,0 +1,29 @@ +id|species|sex|race|ethnicity|days_to_birth|vital_status|age_at_death|cause_of_death|id_1|member_of_research_project|primary_diagnosis_condition|primary_diagnosis_site|id_2|primary_diagnosis|age_at_diagnosis|morphology|stage|grade|method_of_diagnosis|identifier.system|identifier.value|subject_associated_project.0|ResearchSubject.id|ResearchSubject.member_of_research_project|ResearchSubject.primary_diagnosis_condition|ResearchSubject.primary_diagnosis_site|ResearchSubject.identifier.system|ResearchSubject.identifier.value|ResearchSubject.Diagnosis.id|ResearchSubject.Diagnosis.primary_diagnosis|ResearchSubject.Diagnosis.age_at_diagnosis|ResearchSubject.Diagnosis.morphology|ResearchSubject.Diagnosis.stage|ResearchSubject.Diagnosis.grade|ResearchSubject.Diagnosis.method_of_diagnosis|ResearchSubject.Diagnosis.identifier.system|ResearchSubject.Diagnosis.identifier.value|ResearchSubject.Diagnosis.Treatment.id|ResearchSubject.Diagnosis.Treatment.treatment_type|ResearchSubject.Diagnosis.Treatment.treatment_outcome|ResearchSubject.Diagnosis.Treatment.days_to_treatment_start|ResearchSubject.Diagnosis.Treatment.days_to_treatment_end|ResearchSubject.Diagnosis.Treatment.therapeutic_agent|ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site|ResearchSubject.Diagnosis.Treatment.treatment_effect|ResearchSubject.Diagnosis.Treatment.treatment_end_reason|ResearchSubject.Diagnosis.Treatment.number_of_cycles|ResearchSubject.Diagnosis.Treatment.identifier.system|ResearchSubject.Diagnosis.Treatment.identifier.value|ResearchSubject.Specimen.id|ResearchSubject.Specimen.associated_project|ResearchSubject.Specimen.age_at_collection|ResearchSubject.Specimen.primary_disease_type|ResearchSubject.Specimen.anatomical_site|ResearchSubject.Specimen.source_material_type|ResearchSubject.Specimen.specimen_type|ResearchSubject.Specimen.derived_from_specimen|ResearchSubject.Specimen.derived_from_subject|ResearchSubject.Specimen.identifier.system|ResearchSubject.Specimen.identifier.value|identifier_1.system|identifier_1.value|Diagnosis.id|Diagnosis.primary_diagnosis|Diagnosis.age_at_diagnosis|Diagnosis.morphology|Diagnosis.stage|Diagnosis.grade|Diagnosis.method_of_diagnosis|Diagnosis.identifier.system|Diagnosis.identifier.value|Diagnosis.Treatment.id|Diagnosis.Treatment.treatment_type|Diagnosis.Treatment.treatment_outcome|Diagnosis.Treatment.days_to_treatment_start|Diagnosis.Treatment.days_to_treatment_end|Diagnosis.Treatment.therapeutic_agent|Diagnosis.Treatment.treatment_anatomic_site|Diagnosis.Treatment.treatment_effect|Diagnosis.Treatment.treatment_end_reason|Diagnosis.Treatment.number_of_cycles|Diagnosis.Treatment.identifier.system|Diagnosis.Treatment.identifier.value|Specimen.id|Specimen.associated_project|Specimen.age_at_collection|Specimen.primary_disease_type|Specimen.anatomical_site|Specimen.source_material_type|Specimen.specimen_type|Specimen.derived_from_specimen|Specimen.derived_from_subject|Specimen.identifier.system|Specimen.identifier.value|identifier_2.system|identifier_2.value|Treatment.id|Treatment.treatment_type|Treatment.treatment_outcome|Treatment.days_to_treatment_start|Treatment.days_to_treatment_end|Treatment.therapeutic_agent|Treatment.treatment_anatomic_site|Treatment.treatment_effect|Treatment.treatment_end_reason|Treatment.number_of_cycles|Treatment.identifier.system|Treatment.identifier.value +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||"GDC"|"TCGA-29-2435"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"GDC"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|||"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||"GDC"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|||"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"ecd1e57a-8a3c-5591-9cc4-fd490f66f24c"|"Pharmaceutical Therapy, NOS"|||||||||"GDC"|"ecd1e57a-8a3c-5591-9cc4-fd490f66f24c"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|||"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"68aece6e-2de6-506a-8190-7433a1f5b8eb"|"Radiation Therapy, NOS"|||||||||"GDC"|"68aece6e-2de6-506a-8190-7433a1f5b8eb"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"||||||||||||||||||||||||"a76ac995-782d-4ba3-a029-40f8d7d9642b"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Blood Derived Normal"|"sample"|"initial specimen"|"TCGA-29-2435"|"GDC"|"a76ac995-782d-4ba3-a029-40f8d7d9642b"|||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"||||||||||||||||||||||||"c34bf82c-7b38-482a-bfe3-f2750f1a24bc"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Blood Derived Normal"|"portion"|"a76ac995-782d-4ba3-a029-40f8d7d9642b"|"TCGA-29-2435"|"GDC"|"c34bf82c-7b38-482a-bfe3-f2750f1a24bc"|||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"||||||||||||||||||||||||"69a797dd-1083-4a83-a184-ec19f704d770"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"sample"|"initial specimen"|"TCGA-29-2435"|"GDC"|"69a797dd-1083-4a83-a184-ec19f704d770"|||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"||||||||||||||||||||||||"84e1540d-c756-414e-92c9-3e0d5f45cd48"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"portion"|"69a797dd-1083-4a83-a184-ec19f704d770"|"TCGA-29-2435"|"GDC"|"84e1540d-c756-414e-92c9-3e0d5f45cd48"|||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"||||||||||||||||||||||||"49cf0c2e-45b3-4a61-97ad-8cae610313d0"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"portion"|"69a797dd-1083-4a83-a184-ec19f704d770"|"TCGA-29-2435"|"GDC"|"49cf0c2e-45b3-4a61-97ad-8cae610313d0"|||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"||||||||||||||||||||||||"169f0540-bdac-416d-b239-74b4804b140b"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"portion"|"69a797dd-1083-4a83-a184-ec19f704d770"|"TCGA-29-2435"|"GDC"|"169f0540-bdac-416d-b239-74b4804b140b"|||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"||||||||||||||||||||||||"3302664d-2406-4641-b54f-daf7400522b8"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"slide"|"169f0540-bdac-416d-b239-74b4804b140b"|"TCGA-29-2435"|"GDC"|"3302664d-2406-4641-b54f-daf7400522b8"|||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"||||||||||||||||||||||||"b317672c-b196-428f-9c3f-077fdd24e96e"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"slide"|"169f0540-bdac-416d-b239-74b4804b140b"|"TCGA-29-2435"|"GDC"|"b317672c-b196-428f-9c3f-077fdd24e96e"|||||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|||||||||||||||||||||||||||||||||||||||"GDC"|"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|||||||||||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|||||||||||||||||||||||||||||||||||||||||"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||"GDC"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"||||||||||||||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|||||||||||||||||||||||||||||||||||||||||"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"ecd1e57a-8a3c-5591-9cc4-fd490f66f24c"|"Pharmaceutical Therapy, NOS"|||||||||"GDC"|"ecd1e57a-8a3c-5591-9cc4-fd490f66f24c"||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|||||||||||||||||||||||||||||||||||||||||"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"68aece6e-2de6-506a-8190-7433a1f5b8eb"|"Radiation Therapy, NOS"|||||||||"GDC"|"68aece6e-2de6-506a-8190-7433a1f5b8eb"||||||||||||||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"a76ac995-782d-4ba3-a029-40f8d7d9642b"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Blood Derived Normal"|"sample"|"initial specimen"|"TCGA-29-2435"|"GDC"|"a76ac995-782d-4ba3-a029-40f8d7d9642b"|||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"c34bf82c-7b38-482a-bfe3-f2750f1a24bc"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Blood Derived Normal"|"portion"|"a76ac995-782d-4ba3-a029-40f8d7d9642b"|"TCGA-29-2435"|"GDC"|"c34bf82c-7b38-482a-bfe3-f2750f1a24bc"|||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"69a797dd-1083-4a83-a184-ec19f704d770"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"sample"|"initial specimen"|"TCGA-29-2435"|"GDC"|"69a797dd-1083-4a83-a184-ec19f704d770"|||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"84e1540d-c756-414e-92c9-3e0d5f45cd48"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"portion"|"69a797dd-1083-4a83-a184-ec19f704d770"|"TCGA-29-2435"|"GDC"|"84e1540d-c756-414e-92c9-3e0d5f45cd48"|||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"49cf0c2e-45b3-4a61-97ad-8cae610313d0"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"portion"|"69a797dd-1083-4a83-a184-ec19f704d770"|"TCGA-29-2435"|"GDC"|"49cf0c2e-45b3-4a61-97ad-8cae610313d0"|||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"169f0540-bdac-416d-b239-74b4804b140b"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"portion"|"69a797dd-1083-4a83-a184-ec19f704d770"|"TCGA-29-2435"|"GDC"|"169f0540-bdac-416d-b239-74b4804b140b"|||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"3302664d-2406-4641-b54f-daf7400522b8"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"slide"|"169f0540-bdac-416d-b239-74b4804b140b"|"TCGA-29-2435"|"GDC"|"3302664d-2406-4641-b54f-daf7400522b8"|||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"b317672c-b196-428f-9c3f-077fdd24e96e"|"TCGA-OV"|"-28779"|"Cystic, Mucinous and Serous Neoplasms"||"Primary Tumor"|"slide"|"169f0540-bdac-416d-b239-74b4804b140b"|"TCGA-29-2435"|"GDC"|"b317672c-b196-428f-9c3f-077fdd24e96e"|||||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"GDC"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|||||||||||| +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"ecd1e57a-8a3c-5591-9cc4-fd490f66f24c"|"Pharmaceutical Therapy, NOS"|||||||||"GDC"|"ecd1e57a-8a3c-5591-9cc4-fd490f66f24c" +"TCGA-29-2435"|"Homo sapiens"|"female"|"white"|"not reported"|"-28779"|"Alive"|||"8fa35df3-f544-4c47-bdd1-e4d6fc6662be"|"TCGA-OV"|"Cystic, Mucinous and Serous Neoplasms"|"Ovary"|"dc8af98b-03cb-5817-84fa-d86a7f2df8c6"|"Serous cystadenocarcinoma, NOS"|"28779"|"8441/3"||"not reported"||||"TCGA-OV"|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"68aece6e-2de6-506a-8190-7433a1f5b8eb"|"Radiation Therapy, NOS"|||||||||"GDC"|"68aece6e-2de6-506a-8190-7433a1f5b8eb" diff --git a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java index 3b83706d..a38ffa1e 100644 --- a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java +++ b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java @@ -18,7 +18,6 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; -import java.util.Collections; import java.util.Set; import javax.servlet.http.HttpServletRequest; import javax.validation.Valid; @@ -62,11 +61,13 @@ private String createNextUrl(String jobId, int offset, int limit) { @TrackExecutionTime @Override - public ResponseEntity query(String id, Integer offset, Integer limit) { - var result = queryService.getQueryResults(id, offset, limit); - var response = + public ResponseEntity query( + String id, Integer offset, Integer limit, String format, String includeHeaders) { + QueryService.QueryResult result = + queryService.getQueryResults(id, offset, limit, format, includeHeaders); + QueryResponseData response = new QueryResponseData() - .result(Collections.unmodifiableList(result.items)) + .result(result.items) .totalRowCount(result.totalRowCount) .querySql(result.querySql); int nextPage = result.items.size() + limit; diff --git a/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java b/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java new file mode 100644 index 00000000..ce95eddb --- /dev/null +++ b/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java @@ -0,0 +1,471 @@ +package bio.terra.cda.app.flatten; + +import bio.terra.cda.app.flatten.model.Row; +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; +import com.jayway.jsonpath.spi.json.JacksonJsonProvider; +import com.jayway.jsonpath.spi.json.JsonProvider; +import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; +import com.jayway.jsonpath.spi.mapper.MappingProvider; +import java.io.*; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * This utility converts a Json document in a 2D spreadsheet like CSV. This class is strongly + * borrowed from https://github.com/opendevl/Json2Flat but modified to handle the ResultObject of + * type List which reflects multiple rows that are coming out of BigQuery. + * + *

The field separators can be modified from ("/t" "," "|") etc by utilizing + */ +public class JsonFlattener { + + private String jsonString = null; + + private List sheetMatrix = null; + + private List pathList = null; + + private String tmp[] = null; + + private HashSet primitivePath = null; + private HashSet primitiveUniquePath = null; + private List unique = null; + + private String regex = "(\\[[0-9]*\\]$)"; + private Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE); + + private JsonElement element = null; + + private String tmpPath = null; + + private OrderJson makeOrder = new OrderJson(); + + public JsonFlattener() {} + + /** + * This method does some pre processing and then calls make2D() to get the spreadsheet + * representation of Json document. + * + * @return returns a JsonFlattener object + */ + public JsonFlattener json2Sheet(String jsonString, String includeHeaders) { + this.jsonString = jsonString; + + Configuration.setDefaults( + new Configuration.Defaults() { + private final JsonProvider jsonProvider = new JacksonJsonProvider(); + private final MappingProvider mappingProvider = new JacksonMappingProvider(); + + // @Override + public JsonProvider jsonProvider() { + return jsonProvider; + } + + // @Override + public MappingProvider mappingProvider() { + return mappingProvider; + } + + // @Override + public Set options() { + return EnumSet.noneOf(Option.class); + } + }); + + Configuration conf = + Configuration.defaultConfiguration() + .addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL) + .addOptions(Option.SUPPRESS_EXCEPTIONS); + + Configuration pathConf = + Configuration.defaultConfiguration() + .addOptions(Option.AS_PATH_LIST) + .addOptions(Option.ALWAYS_RETURN_LIST); + + DocumentContext parse = null; + + sheetMatrix = new ArrayList(); + + element = new JsonParser().parse(this.jsonString); + + pathList = JsonPath.using(pathConf).parse(this.jsonString).read("$..*"); + + parse = JsonPath.using(conf).parse(this.jsonString); + + primitivePath = new LinkedHashSet(); + primitiveUniquePath = new LinkedHashSet(); + + for (String o : pathList) { + Object tmp = parse.read(o); + + if (tmp == null) { + primitivePath.add(o); + + } else { + String dataType = tmp.getClass().getSimpleName(); + if (dataType.equals("Boolean") + || dataType.equals("Integer") + || dataType.equals("String") + || dataType.equals("Double") + || dataType.equals("Long")) { + primitivePath.add(o); + } else { + // it's not a primitive data type + } + } + } + + for (String o : primitivePath) { + + Matcher m = pattern.matcher(o); + + if (m.find()) { + tmp = o.replace("$", "").split("(\\[[0-9]*\\]$)"); + tmp[0] = tmp[0].replaceAll("(\\[[0-9]*\\])", ""); + primitiveUniquePath.add( + (tmp[0] + m.group()) + .replace("'][", ".") + .replace("[", "") + .replace("]", "") + .replace("''", ".") + .replace("'", "")); + } else { + primitiveUniquePath.add( + o.replace("$", "") + .replaceAll("(\\[[0-9]*\\])", "") + .replace("[", "") + .replace("]", "") + .replace("''", ".") + .replace("'", "")); + } + } + + unique = new ArrayList(primitiveUniquePath); + + // choose to suppress the header row if we are aggregating multiple input results downstream. + if (includeHeaders.equals("true")) { + Object[] header = new Object[unique.size()]; + int i = 0; + for (String o : unique) { + header[i] = o; + i++; + } + + // header of the csv + sheetMatrix.add(header); + } + + // adding all the content of csv + sheetMatrix.add(make2D(new Object[unique.size()], new Object[unique.size()], element, "$")); + + Object last[] = sheetMatrix.get(sheetMatrix.size() - 1); + Object secondLast[] = sheetMatrix.get(sheetMatrix.size() - 2); + + boolean delete = true; + + for (Object o : last) { + if (o != null) { + delete = false; + break; + } + } + + if (!delete) { + delete = true; + for (int DEL = 0; DEL < last.length; DEL++) { + if (last[DEL] != null && !last[DEL].equals(secondLast[DEL])) { + delete = false; + break; + } + } + } + + if (delete) sheetMatrix.remove(sheetMatrix.size() - 1); + + return this; + } + + /** + * This function transforms the JSON document to its equivalent 2D representation. + * + * @param current its the logical current row of the Json being processed + * @param old it keeps the old row which is always assigned to the current row. + * @param element this keeps the part of json being parsed to 2D. + * @param path this mantains the path of the Json element being processed. + * @return + */ + private Object[] make2D(Object[] current, Object[] old, JsonElement element, String path) { + + current = old.clone(); + + boolean gotArray = false; + + if (element.isJsonObject()) { + + /* + * applying order to JSON. Order - + * 1) JSON primitive + * 2) JSON Array + * 3) JSON Object + */ + element = makeOrder.orderJson(element); + + for (Map.Entry entry : element.getAsJsonObject().entrySet()) { + + if (entry.getValue().isJsonPrimitive()) { + tmpPath = path + "['" + entry.getKey() + "']"; + Matcher m = pattern.matcher(tmpPath); + + if (m.find()) { + String[] tmp = tmpPath.replace("$", "").split("(\\[[0-9]*\\]$)"); + tmp[0] = tmp[0].replaceAll("(\\[[0-9]*\\])", ""); + tmpPath = + ((tmp[0] + m.group()) + .replace("'][", ".") + .replace("[", "") + .replace("]", "") + .replace("''", ".") + .replace("'", "")); + } else { + tmpPath = + (tmpPath + .replace("$", "") + .replaceAll("(\\[[0-9]*\\])", "") + .replace("[", "") + .replace("]", "") + .replace("''", ".") + .replace("'", "")); + } + + if (unique.contains(tmpPath)) { + int index = unique.indexOf(tmpPath); + current[index] = entry.getValue().getAsJsonPrimitive(); + } + tmpPath = null; + } else if (entry.getValue().isJsonObject()) { + current = + make2D( + new Object[unique.size()], + current, + entry.getValue().getAsJsonObject(), + path + "['" + entry.getKey() + "']"); + } else if (entry.getValue().isJsonArray()) { + current = + make2D( + new Object[unique.size()], + current, + entry.getValue().getAsJsonArray(), + path + "['" + entry.getKey() + "']"); + } + } + + } else if (element.isJsonArray()) { + int arrIndex = 0; + + for (JsonElement tmp : element.getAsJsonArray()) { + + if (tmp.isJsonPrimitive()) { + tmpPath = path + "['" + arrIndex + "']"; + Matcher m = pattern.matcher(tmpPath); + + if (m.find()) { + String tmp1[] = tmpPath.replace("$", "").split("(\\[[0-9]*\\]$)"); + tmp1[0] = tmp1[0].replaceAll("(\\[[0-9]*\\])", ""); + tmpPath = + ((tmp1[0] + m.group()) + .replace("'][", ".") + .replace("[", "") + .replace("]", "") + .replace("''", ".") + .replace("'", "")); + } else { + tmpPath = + (tmpPath + .replace("$", "") + .replaceAll("(\\[[0-9]*\\])", "") + .replace("[", "") + .replace("]", "") + .replace("''", ".") + .replace("'", "")); + } + + if (unique.contains(tmpPath)) { + int index = unique.indexOf(tmpPath); + current[index] = tmp.getAsJsonPrimitive(); + } + tmpPath = null; + } else { + if (tmp.isJsonObject()) { + gotArray = isInnerArray(tmp); + + sheetMatrix.add( + make2D( + new Object[unique.size()], + current, + tmp.getAsJsonObject(), + path + "[" + arrIndex + "]")); + if (gotArray) { + sheetMatrix.remove(sheetMatrix.size() - 1); + } + } else if (tmp.isJsonArray()) { + make2D( + new Object[unique.size()], + current, + tmp.getAsJsonArray(), + path + "[" + arrIndex + "]"); + } + } + arrIndex++; + } + } + return current; + } + + /** + * This method checks whether object inside an array contains an array or not. + * + * @param element it a Json object inside an array + * @return it returns true if Json object inside an array contains an array or else false + */ + private boolean isInnerArray(JsonElement element) { + + for (Map.Entry entry : element.getAsJsonObject().entrySet()) { + if (entry.getValue().isJsonArray()) { + if (entry.getValue().getAsJsonArray().size() > 0) + for (JsonElement checkPrimitive : entry.getValue().getAsJsonArray()) { + + if (checkPrimitive.isJsonObject()) { + return true; + } + } + } + } + return false; + } + + /** + * This method replaces the default header separator i.e. "." with a custom separator provided by + * user. + * + * @param separator + * @return JFlat + * @throws Exception + */ + public JsonFlattener headerSeparator(String separator) throws Exception { + try { + + int sheetMatrixLen = this.sheetMatrix.get(0).length; + + for (int I = 0; I < sheetMatrixLen; I++) { + + this.sheetMatrix.get(0)[I] = + this.sheetMatrix + .get(0)[I] + .toString() + .replaceFirst("^\\/", "") + .replaceAll(".", separator) + .trim(); + } + + } catch (NullPointerException nullex) { + throw new Exception( + "The JSON document hasn't been transformed yet. Try using json2Sheet() before using headerSeparator"); + } + return this; + } + + /** + * This method returns the sheet matrix. + * + * @return List + */ + public List getJsonAsSheet() { + return this.sheetMatrix; + } + + /** + * This method returns the spreadsheet as List + * + * @return List + */ + public List getJsonAsSpreadsheet() { + List spreadsheet = new ArrayList(); + for (Object[] sheetRow : this.sheetMatrix) { + spreadsheet.add(Row.toSpreadsheetRow(sheetRow)); + } + return spreadsheet; + } + + /** + * This method returns unique fields of the json + * + * @return List + */ + public List getUniqueFields() { + return this.unique; + } + + /** + * This method writes the spreadsheet representation in csv format with ',' as default delimiter. + * + * @param destination it takes the destination path for the csv file. + * @throws FileNotFoundException + * @throws UnsupportedEncodingException + */ + public void write2csv(String destination) + throws FileNotFoundException, UnsupportedEncodingException { + this.write2csv(destination, '|'); + } + + /** + * This method writes the spreadsheet representation in csv format with custom delimiter set by + * user. + * + * @param destination it takes the destination path for the csv file. + * @param delimiter it represents the delimiter set by user. + * @throws FileNotFoundException + * @throws UnsupportedEncodingException + */ + public void write2csv(String destination, char delimiter) + throws FileNotFoundException, UnsupportedEncodingException { + PrintWriter writer = new PrintWriter(new File(destination), "UTF-8"); + writer.write(write2csv(delimiter)); + writer.close(); + } + + /** + * This method returns the spreadsheet representation in csv format as string with custom + * delimiter set by user. + * + * @param delimiter it represents the delimiter set by user. + */ + public String write2csv(char delimiter) { + boolean comma = false; + StringBuffer buffer = new StringBuffer(); + for (Object[] o : this.sheetMatrix) { + comma = false; + for (Object t : o) { + if (t == null) { + buffer.append(comma ? String.valueOf(delimiter) : ""); + } else { + buffer.append(comma ? delimiter + t.toString() : t.toString()); + } + if (!comma) comma = true; + } + buffer.append("\n"); + } + return buffer.toString(); + } +} diff --git a/src/main/java/bio/terra/cda/app/flatten/OrderJson.java b/src/main/java/bio/terra/cda/app/flatten/OrderJson.java new file mode 100644 index 00000000..2a1fe639 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/flatten/OrderJson.java @@ -0,0 +1,71 @@ +package bio.terra.cda.app.flatten; + +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.reflect.TypeToken; +import java.lang.reflect.Type; +import java.util.LinkedHashMap; +import java.util.Map; + +public class OrderJson { + + Type type = new TypeToken>() {}.getType(); + + Map origMap = null; + + Map jsonPrimitive = null; + Map jsonArray = null; + Map jsonObject = null; + + Gson gson = null; + + public OrderJson() { + gson = new Gson(); + } + + public JsonElement orderJson(JsonElement element) { + + // LinkedHashMap to maintain insertion order + origMap = new LinkedHashMap(); + + jsonPrimitive = new LinkedHashMap(); + jsonArray = new LinkedHashMap(); + jsonObject = new LinkedHashMap(); + + // converting JsonElement to Map + origMap = gson.fromJson(element, type); + + // Iterating the Map object to get type of Object + for (Map.Entry entry : origMap.entrySet()) { + + try { + // adding check if value of key in json is null + if (entry.getValue() == null + || entry.getValue().getClass().getSimpleName().equals("ArrayList")) { + + // if Object is of type ArrayList push it to jsonArray Map + jsonArray.put(entry.getKey(), entry.getValue()); + + } else { + + // if Object is of type Primitive push to the jsonPrimitive Map + jsonPrimitive.put(entry.getKey(), entry.getValue()); + } + } catch (Exception ex) { + ex.printStackTrace(); + } + } + + /* Keeping Order - + * 1) JSON primitive + * 2) JSON Array + * 3) JSON Object ( order of JSON Object is yet to be decided) + * */ + + // appending jsonArray map to jsonPrimitive map in order to mantain order. + jsonPrimitive.putAll(jsonArray); + + // reconstructing the JSON from Map Objects and returning + return gson.toJsonTree(jsonPrimitive, LinkedHashMap.class); + } +} diff --git a/src/main/java/bio/terra/cda/app/flatten/model/Row.java b/src/main/java/bio/terra/cda/app/flatten/model/Row.java new file mode 100644 index 00000000..79c03147 --- /dev/null +++ b/src/main/java/bio/terra/cda/app/flatten/model/Row.java @@ -0,0 +1,21 @@ +package bio.terra.cda.app.flatten.model; + +public class Row { + + public static String toSpreadsheetRow(Object[] cellData) { + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < cellData.length - 1; i++) { + if (cellData[i] != null) { + cellData[i].toString().replaceAll(",", "\\,"); + builder.append(cellData[i].toString() + "\t"); + } else { + builder.append("\t"); + } + } + if (cellData[cellData.length - 1] != null) { + builder.append(cellData[cellData.length - 1]); + } + + return builder.toString(); + } +} diff --git a/src/main/java/bio/terra/cda/app/service/QueryService.java b/src/main/java/bio/terra/cda/app/service/QueryService.java index 0683cc0b..a3b39f7f 100644 --- a/src/main/java/bio/terra/cda/app/service/QueryService.java +++ b/src/main/java/bio/terra/cda/app/service/QueryService.java @@ -3,6 +3,7 @@ import static java.lang.Thread.currentThread; import bio.terra.cda.app.configuration.ApplicationConfiguration; +import bio.terra.cda.app.flatten.JsonFlattener; import bio.terra.cda.app.service.exception.BadQueryException; import bio.terra.cda.generated.model.JobStatusData; import bio.terra.cda.generated.model.SystemStatus; @@ -130,7 +131,8 @@ protected JsonNode valueToJson(FieldValue value, Field field) { } } - public QueryResult getQueryResults(String queryId, int offset, int pageSize) { + public QueryResult getQueryResults( + String queryId, int offset, int pageSize, String format, String includeHeaders) { final Job job = bigQuery.getJob(queryId); if (job == null || !job.exists()) { throw new RuntimeException("Unknown query " + queryId); @@ -139,7 +141,7 @@ public QueryResult getQueryResults(String queryId, int offset, int pageSize) { // If the Query is still running, return an empty result. return new QueryResult(Collections.emptyList(), null, getSqlFromJob(job)); } - return getJobResults(job, offset, pageSize); + return getJobResults(job, offset, pageSize, format, includeHeaders); } public void setBigQuery(BigQuery bigQuery) { @@ -151,14 +153,18 @@ public static class QueryResult { public final Long totalRowCount; public final String querySql; - QueryResult(List items, Long totalRowCount, String querySql) { + QueryResult(List items, Long totalRowCount, String querySql) { this.items = new ArrayList<>(items); this.totalRowCount = totalRowCount; this.querySql = querySql; } } - private QueryResult getJobResults(Job queryJob, int offset, int pageSize) { + private QueryResult getJobResults( + Job queryJob, int offset, int pageSize, String format, String includeHeaders) { + if (!format.equals("JSON")) { + pageSize = 10; + } var options = new ArrayList(); if (offset < 0) { throw new IllegalArgumentException("Invalid offset: " + offset); @@ -192,10 +198,33 @@ private QueryResult getJobResults(Job queryJob, int offset, int pageSize) { break; } } + // If we are returning CSV as a spreadsheet, pageSize is constrained to 10 json rows for + // now. + if (!format.equals("JSON")) { + List spreadsheet = new ArrayList<>(); + JsonFlattener jflat = new JsonFlattener(); + int bqResults = 0; + for (JsonNode jsonRow : jsonData) { + if (bqResults > 0) { + includeHeaders = "false"; + } + List csvRows = + jflat.json2Sheet(jsonRow.toString(), includeHeaders).getJsonAsSpreadsheet(); + spreadsheet.addAll(csvRows); + bqResults++; + } + + logQuery(queryJob, jsonData); + + return new QueryResult(spreadsheet, result.getTotalRows(), getSqlFromJob(queryJob)); + } logQuery(queryJob, jsonData); - return new QueryResult(jsonData, result.getTotalRows(), getSqlFromJob(queryJob)); + List jsonRowData = new ArrayList<>(); + jsonRowData.add(jsonData); + return new QueryResult(jsonRowData, result.getTotalRows(), getSqlFromJob(queryJob)); + } catch (InterruptedException e) { currentThread().interrupt(); throw new BadQueryException("Error while getting query results", e); diff --git a/src/main/resources/api/service_openapi.yaml b/src/main/resources/api/service_openapi.yaml index 1af043ef..f8edbc9b 100644 --- a/src/main/resources/api/service_openapi.yaml +++ b/src/main/resources/api/service_openapi.yaml @@ -144,9 +144,11 @@ paths: - query parameters: - - $ref: "#/components/parameters/QueryId" - - $ref: "#/components/parameters/ResultOffset" - - $ref: "#/components/parameters/ResultLimit" + - $ref: '#/components/parameters/QueryId' + - $ref: '#/components/parameters/ResultOffset' + - $ref: '#/components/parameters/ResultLimit' + - $ref: '#/components/parameters/Format' + - $ref: '#/components/parameters/Headers' responses: 200: @@ -321,6 +323,22 @@ components: type: string default: gdc-bq-sample.dev description: tablename + Format: + in: query + name: format + schema: + type: string + enum: ["JSON", "TSV"] + default: JSON + description: Output format + Headers: + in: query + name: includeHeaders + schema: + type: string + default: true + description: Include the column headers in TSV returns + schemas: ErrorReport: @@ -383,6 +401,15 @@ components: nullable: true description: a URL to use to fetch the next page of data in the query. can be null if the query is not complete + Row: + type: object + properties: + cells: + type: array + items: + type: string + description: spreadsheet cell data + DatasetDescription: type: object properties: diff --git a/src/test/java/bio/terra/cda/app/flatten/FlattenTest.java b/src/test/java/bio/terra/cda/app/flatten/FlattenTest.java new file mode 100644 index 00000000..9e620e7d --- /dev/null +++ b/src/test/java/bio/terra/cda/app/flatten/FlattenTest.java @@ -0,0 +1,101 @@ +package bio.terra.cda.app.flatten; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; +import com.jayway.jsonpath.spi.json.JacksonJsonProvider; +import com.jayway.jsonpath.spi.json.JsonProvider; +import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; +import com.jayway.jsonpath.spi.mapper.MappingProvider; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.EnumSet; +import java.util.List; +import java.util.Set; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +class FlattenTest { + + static final Path TEST_FILES = Paths.get("src/test/resources/json_files"); + + @Test + public void testFlatteningUsingJsonFlattener() throws Exception { + String jsonString = Files.readString(TEST_FILES.resolve("gdc_no_files1.json")); + JsonFlattener jflat = new JsonFlattener(); + + List json2csv = jflat.json2Sheet(jsonString, "true").getJsonAsSpreadsheet(); + + assertEquals(json2csv.size(), 29); + assertTrue( + jflat + .json2Sheet(jsonString, "true") + .getUniqueFields() + .contains("member_of_research_project")); + } + + @Disabled + public void testPathList() throws Exception { + Configuration.setDefaults( + new Configuration.Defaults() { + private final JsonProvider jsonProvider = new JacksonJsonProvider(); + private final MappingProvider mappingProvider = new JacksonMappingProvider(); + + // @Override + public JsonProvider jsonProvider() { + return jsonProvider; + } + + // @Override + public MappingProvider mappingProvider() { + return mappingProvider; + } + + // @Override + public Set options() { + return EnumSet.noneOf(Option.class); + } + }); + + Configuration conf = + Configuration.defaultConfiguration() + .addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL) + .addOptions(Option.SUPPRESS_EXCEPTIONS); + + Configuration pathConf = + Configuration.defaultConfiguration() + .addOptions(Option.AS_PATH_LIST) + .addOptions(Option.ALWAYS_RETURN_LIST); + + String jsonString = Files.readString(TEST_FILES.resolve("gdc_no_files1.json")); + List pathList = JsonPath.using(pathConf).parse(jsonString).read("$..*"); + + for (String path : pathList) { + System.out.println(pathList); + } + } + + @Test + public void json2SheetWriter() throws Exception { + String jsonString = Files.readString(TEST_FILES.resolve("gdc_no_files1.json")); + JsonFlattener jflat = new JsonFlattener().json2Sheet(jsonString, "true"); + List objects = jflat.getJsonAsSheet(); + assertEquals(objects.size(), 29); + + List rows = jflat.getJsonAsSpreadsheet(); + assertEquals(rows.size(), 29); + } + + @Disabled + public void testCsvWriter() throws Exception { + String jsonString = Files.readString(TEST_FILES.resolve("gdc_no_files1.json")); + JsonFlattener jflat = new JsonFlattener(); + + List json2csv = jflat.json2Sheet(jsonString, "true").getJsonAsSpreadsheet(); + jflat.json2Sheet(jsonString, "true").write2csv("./jsonOutput"); + } +} diff --git a/src/test/java/bio/terra/cda/app/flatten/model/RowTest.java b/src/test/java/bio/terra/cda/app/flatten/model/RowTest.java new file mode 100644 index 00000000..7d06abc2 --- /dev/null +++ b/src/test/java/bio/terra/cda/app/flatten/model/RowTest.java @@ -0,0 +1,17 @@ +package bio.terra.cda.app.flatten.model; + +import org.junit.jupiter.api.Disabled; + +public class RowTest { + + static final String row_content = + "TCGA-29-2435,Homo sapiens,female,white,not reported,-28779,Alive,,,8fa35df3-f544-4c47-bdd1-e4d6fc6662be,TCGA-OV"; + + @Disabled + public void testRowCreation() throws Exception { + Object[] cells = row_content.split(","); + String row = Row.toSpreadsheetRow(cells); + + // assertEquals(row.getCells().length, 11); + } +} diff --git a/src/test/resources/csv_files/jsonOutput-7.csv b/src/test/resources/csv_files/jsonOutput-7.csv new file mode 100644 index 00000000..2eca3e01 --- /dev/null +++ b/src/test/resources/csv_files/jsonOutput-7.csv @@ -0,0 +1,29 @@ +id,species,sex,race,ethnicity,days_to_birth,vital_status,age_at_death,cause_of_death,id_1,member_of_research_project,primary_diagnosis_condition,primary_diagnosis_site,id_2,primary_diagnosis,age_at_diagnosis,morphology,stage,grade,method_of_diagnosis,identifier.system,identifier.value,subject_associated_project.0,ResearchSubject.id,ResearchSubject.member_of_research_project,ResearchSubject.primary_diagnosis_condition,ResearchSubject.primary_diagnosis_site,ResearchSubject.identifier.system,ResearchSubject.identifier.value,ResearchSubject.Diagnosis.id,ResearchSubject.Diagnosis.primary_diagnosis,ResearchSubject.Diagnosis.age_at_diagnosis,ResearchSubject.Diagnosis.morphology,ResearchSubject.Diagnosis.stage,ResearchSubject.Diagnosis.grade,ResearchSubject.Diagnosis.method_of_diagnosis,ResearchSubject.Diagnosis.identifier.system,ResearchSubject.Diagnosis.identifier.value,ResearchSubject.Diagnosis.Treatment.id,ResearchSubject.Diagnosis.Treatment.treatment_type,ResearchSubject.Diagnosis.Treatment.treatment_outcome,ResearchSubject.Diagnosis.Treatment.days_to_treatment_start,ResearchSubject.Diagnosis.Treatment.days_to_treatment_end,ResearchSubject.Diagnosis.Treatment.therapeutic_agent,ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site,ResearchSubject.Diagnosis.Treatment.treatment_effect,ResearchSubject.Diagnosis.Treatment.treatment_end_reason,ResearchSubject.Diagnosis.Treatment.number_of_cycles,ResearchSubject.Diagnosis.Treatment.identifier.system,ResearchSubject.Diagnosis.Treatment.identifier.value,ResearchSubject.Specimen.id,ResearchSubject.Specimen.associated_project,ResearchSubject.Specimen.age_at_collection,ResearchSubject.Specimen.primary_disease_type,ResearchSubject.Specimen.anatomical_site,ResearchSubject.Specimen.source_material_type,ResearchSubject.Specimen.specimen_type,ResearchSubject.Specimen.derived_from_specimen,ResearchSubject.Specimen.derived_from_subject,ResearchSubject.Specimen.identifier.system,ResearchSubject.Specimen.identifier.value,identifier_1.system,identifier_1.value,Diagnosis.id,Diagnosis.primary_diagnosis,Diagnosis.age_at_diagnosis,Diagnosis.morphology,Diagnosis.stage,Diagnosis.grade,Diagnosis.method_of_diagnosis,Diagnosis.identifier.system,Diagnosis.identifier.value,Diagnosis.Treatment.id,Diagnosis.Treatment.treatment_type,Diagnosis.Treatment.treatment_outcome,Diagnosis.Treatment.days_to_treatment_start,Diagnosis.Treatment.days_to_treatment_end,Diagnosis.Treatment.therapeutic_agent,Diagnosis.Treatment.treatment_anatomic_site,Diagnosis.Treatment.treatment_effect,Diagnosis.Treatment.treatment_end_reason,Diagnosis.Treatment.number_of_cycles,Diagnosis.Treatment.identifier.system,Diagnosis.Treatment.identifier.value,Specimen.id,Specimen.associated_project,Specimen.age_at_collection,Specimen.primary_disease_type,Specimen.anatomical_site,Specimen.source_material_type,Specimen.specimen_type,Specimen.derived_from_specimen,Specimen.derived_from_subject,Specimen.identifier.system,Specimen.identifier.value,identifier_2.system,identifier_2.value,Treatment.id,Treatment.treatment_type,Treatment.treatment_outcome,Treatment.days_to_treatment_start,Treatment.days_to_treatment_end,Treatment.therapeutic_agent,Treatment.treatment_anatomic_site,Treatment.treatment_effect,Treatment.treatment_end_reason,Treatment.number_of_cycles,Treatment.identifier.system,Treatment.identifier.value +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,"GDC","TCGA-29-2435",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","GDC","8fa35df3-f544-4c47-bdd1-e4d6fc6662be",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,"dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,"GDC","dc8af98b-03cb-5817-84fa-d86a7f2df8c6",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,"dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"ecd1e57a-8a3c-5591-9cc4-fd490f66f24c","Pharmaceutical Therapy, NOS",,,,,,,,,"GDC","ecd1e57a-8a3c-5591-9cc4-fd490f66f24c",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,"dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"68aece6e-2de6-506a-8190-7433a1f5b8eb","Radiation Therapy, NOS",,,,,,,,,"GDC","68aece6e-2de6-506a-8190-7433a1f5b8eb",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,,,,,,,,,,,,,,,,,,,,,,"a76ac995-782d-4ba3-a029-40f8d7d9642b","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Blood Derived Normal","sample","initial specimen","TCGA-29-2435","GDC","a76ac995-782d-4ba3-a029-40f8d7d9642b",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,,,,,,,,,,,,,,,,,,,,,,"c34bf82c-7b38-482a-bfe3-f2750f1a24bc","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Blood Derived Normal","portion","a76ac995-782d-4ba3-a029-40f8d7d9642b","TCGA-29-2435","GDC","c34bf82c-7b38-482a-bfe3-f2750f1a24bc",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,,,,,,,,,,,,,,,,,,,,,,"69a797dd-1083-4a83-a184-ec19f704d770","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","sample","initial specimen","TCGA-29-2435","GDC","69a797dd-1083-4a83-a184-ec19f704d770",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,,,,,,,,,,,,,,,,,,,,,,"84e1540d-c756-414e-92c9-3e0d5f45cd48","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","portion","69a797dd-1083-4a83-a184-ec19f704d770","TCGA-29-2435","GDC","84e1540d-c756-414e-92c9-3e0d5f45cd48",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,,,,,,,,,,,,,,,,,,,,,,"49cf0c2e-45b3-4a61-97ad-8cae610313d0","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","portion","69a797dd-1083-4a83-a184-ec19f704d770","TCGA-29-2435","GDC","49cf0c2e-45b3-4a61-97ad-8cae610313d0",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,,,,,,,,,,,,,,,,,,,,,,"169f0540-bdac-416d-b239-74b4804b140b","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","portion","69a797dd-1083-4a83-a184-ec19f704d770","TCGA-29-2435","GDC","169f0540-bdac-416d-b239-74b4804b140b",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,,,,,,,,,,,,,,,,,,,,,,"3302664d-2406-4641-b54f-daf7400522b8","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","slide","169f0540-bdac-416d-b239-74b4804b140b","TCGA-29-2435","GDC","3302664d-2406-4641-b54f-daf7400522b8",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV","8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary",,,,,,,,,,,,,,,,,,,,,,,,"b317672c-b196-428f-9c3f-077fdd24e96e","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","slide","169f0540-bdac-416d-b239-74b4804b140b","TCGA-29-2435","GDC","b317672c-b196-428f-9c3f-077fdd24e96e",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"GDC","8fa35df3-f544-4c47-bdd1-e4d6fc6662be",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,"GDC","dc8af98b-03cb-5817-84fa-d86a7f2df8c6",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"ecd1e57a-8a3c-5591-9cc4-fd490f66f24c","Pharmaceutical Therapy, NOS",,,,,,,,,"GDC","ecd1e57a-8a3c-5591-9cc4-fd490f66f24c",,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"68aece6e-2de6-506a-8190-7433a1f5b8eb","Radiation Therapy, NOS",,,,,,,,,"GDC","68aece6e-2de6-506a-8190-7433a1f5b8eb",,,,,,,,,,,,,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"a76ac995-782d-4ba3-a029-40f8d7d9642b","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Blood Derived Normal","sample","initial specimen","TCGA-29-2435","GDC","a76ac995-782d-4ba3-a029-40f8d7d9642b",,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"c34bf82c-7b38-482a-bfe3-f2750f1a24bc","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Blood Derived Normal","portion","a76ac995-782d-4ba3-a029-40f8d7d9642b","TCGA-29-2435","GDC","c34bf82c-7b38-482a-bfe3-f2750f1a24bc",,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"69a797dd-1083-4a83-a184-ec19f704d770","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","sample","initial specimen","TCGA-29-2435","GDC","69a797dd-1083-4a83-a184-ec19f704d770",,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"84e1540d-c756-414e-92c9-3e0d5f45cd48","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","portion","69a797dd-1083-4a83-a184-ec19f704d770","TCGA-29-2435","GDC","84e1540d-c756-414e-92c9-3e0d5f45cd48",,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"49cf0c2e-45b3-4a61-97ad-8cae610313d0","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","portion","69a797dd-1083-4a83-a184-ec19f704d770","TCGA-29-2435","GDC","49cf0c2e-45b3-4a61-97ad-8cae610313d0",,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"169f0540-bdac-416d-b239-74b4804b140b","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","portion","69a797dd-1083-4a83-a184-ec19f704d770","TCGA-29-2435","GDC","169f0540-bdac-416d-b239-74b4804b140b",,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"3302664d-2406-4641-b54f-daf7400522b8","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","slide","169f0540-bdac-416d-b239-74b4804b140b","TCGA-29-2435","GDC","3302664d-2406-4641-b54f-daf7400522b8",,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"b317672c-b196-428f-9c3f-077fdd24e96e","TCGA-OV","-28779","Cystic, Mucinous and Serous Neoplasms",,"Primary Tumor","slide","169f0540-bdac-416d-b239-74b4804b140b","TCGA-29-2435","GDC","b317672c-b196-428f-9c3f-077fdd24e96e",,,,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"GDC","dc8af98b-03cb-5817-84fa-d86a7f2df8c6",,,,,,,,,,,, +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"ecd1e57a-8a3c-5591-9cc4-fd490f66f24c","Pharmaceutical Therapy, NOS",,,,,,,,,"GDC","ecd1e57a-8a3c-5591-9cc4-fd490f66f24c" +"TCGA-29-2435","Homo sapiens","female","white","not reported","-28779","Alive",,,"8fa35df3-f544-4c47-bdd1-e4d6fc6662be","TCGA-OV","Cystic, Mucinous and Serous Neoplasms","Ovary","dc8af98b-03cb-5817-84fa-d86a7f2df8c6","Serous cystadenocarcinoma, NOS","28779","8441/3",,"not reported",,,,"TCGA-OV",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"68aece6e-2de6-506a-8190-7433a1f5b8eb","Radiation Therapy, NOS",,,,,,,,,"GDC","68aece6e-2de6-506a-8190-7433a1f5b8eb" diff --git a/src/test/resources/json_files/gdc_no_files1.json b/src/test/resources/json_files/gdc_no_files1.json new file mode 100644 index 00000000..d097b2d5 --- /dev/null +++ b/src/test/resources/json_files/gdc_no_files1.json @@ -0,0 +1 @@ +{ "id": "TCGA-29-2435", "identifier": [{ "system": "GDC", "value": "TCGA-29-2435" }], "species": "Homo sapiens", "sex": "female", "race": "white", "ethnicity": "not reported", "days_to_birth": "-28779", "subject_associated_project": ["TCGA-OV"], "vital_status": "Alive", "age_at_death": null, "cause_of_death": null, "ResearchSubject": [{ "id": "8fa35df3-f544-4c47-bdd1-e4d6fc6662be", "identifier": [{ "system": "GDC", "value": "8fa35df3-f544-4c47-bdd1-e4d6fc6662be" }], "member_of_research_project": "TCGA-OV", "primary_diagnosis_condition": "Cystic, Mucinous and Serous Neoplasms", "primary_diagnosis_site": "Ovary", "Diagnosis": [{ "id": "dc8af98b-03cb-5817-84fa-d86a7f2df8c6", "identifier": [{ "system": "GDC", "value": "dc8af98b-03cb-5817-84fa-d86a7f2df8c6" }], "primary_diagnosis": "Serous cystadenocarcinoma, NOS", "age_at_diagnosis": "28779", "morphology": "8441/3", "stage": null, "grade": "not reported", "method_of_diagnosis": null, "Treatment": [{ "id": "ecd1e57a-8a3c-5591-9cc4-fd490f66f24c", "identifier": [{ "system": "GDC", "value": "ecd1e57a-8a3c-5591-9cc4-fd490f66f24c" }], "treatment_type": "Pharmaceutical Therapy, NOS", "treatment_outcome": null, "days_to_treatment_start": null, "days_to_treatment_end": null, "therapeutic_agent": null, "treatment_anatomic_site": null, "treatment_effect": null, "treatment_end_reason": null, "number_of_cycles": null }, { "id": "68aece6e-2de6-506a-8190-7433a1f5b8eb", "identifier": [{ "system": "GDC", "value": "68aece6e-2de6-506a-8190-7433a1f5b8eb" }], "treatment_type": "Radiation Therapy, NOS", "treatment_outcome": null, "days_to_treatment_start": null, "days_to_treatment_end": null, "therapeutic_agent": null, "treatment_anatomic_site": null, "treatment_effect": null, "treatment_end_reason": null, "number_of_cycles": null }] }], "Specimen": [{ "id": "a76ac995-782d-4ba3-a029-40f8d7d9642b", "identifier": [{ "system": "GDC", "value": "a76ac995-782d-4ba3-a029-40f8d7d9642b" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Blood Derived Normal", "specimen_type": "sample", "derived_from_specimen": "initial specimen", "derived_from_subject": "TCGA-29-2435" }, { "id": "c34bf82c-7b38-482a-bfe3-f2750f1a24bc", "identifier": [{ "system": "GDC", "value": "c34bf82c-7b38-482a-bfe3-f2750f1a24bc" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Blood Derived Normal", "specimen_type": "portion", "derived_from_specimen": "a76ac995-782d-4ba3-a029-40f8d7d9642b", "derived_from_subject": "TCGA-29-2435" }, { "id": "69a797dd-1083-4a83-a184-ec19f704d770", "identifier": [{ "system": "GDC", "value": "69a797dd-1083-4a83-a184-ec19f704d770" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "sample", "derived_from_specimen": "initial specimen", "derived_from_subject": "TCGA-29-2435" }, { "id": "84e1540d-c756-414e-92c9-3e0d5f45cd48", "identifier": [{ "system": "GDC", "value": "84e1540d-c756-414e-92c9-3e0d5f45cd48" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "portion", "derived_from_specimen": "69a797dd-1083-4a83-a184-ec19f704d770", "derived_from_subject": "TCGA-29-2435" }, { "id": "49cf0c2e-45b3-4a61-97ad-8cae610313d0", "identifier": [{ "system": "GDC", "value": "49cf0c2e-45b3-4a61-97ad-8cae610313d0" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "portion", "derived_from_specimen": "69a797dd-1083-4a83-a184-ec19f704d770", "derived_from_subject": "TCGA-29-2435" }, { "id": "169f0540-bdac-416d-b239-74b4804b140b", "identifier": [{ "system": "GDC", "value": "169f0540-bdac-416d-b239-74b4804b140b" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "portion", "derived_from_specimen": "69a797dd-1083-4a83-a184-ec19f704d770", "derived_from_subject": "TCGA-29-2435" }, { "id": "3302664d-2406-4641-b54f-daf7400522b8", "identifier": [{ "system": "GDC", "value": "3302664d-2406-4641-b54f-daf7400522b8" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "slide", "derived_from_specimen": "169f0540-bdac-416d-b239-74b4804b140b", "derived_from_subject": "TCGA-29-2435" }, { "id": "b317672c-b196-428f-9c3f-077fdd24e96e", "identifier": [{ "system": "GDC", "value": "b317672c-b196-428f-9c3f-077fdd24e96e" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "slide", "derived_from_specimen": "169f0540-bdac-416d-b239-74b4804b140b", "derived_from_subject": "TCGA-29-2435" }] }], "id_1": "8fa35df3-f544-4c47-bdd1-e4d6fc6662be", "identifier_1": [{ "system": "GDC", "value": "8fa35df3-f544-4c47-bdd1-e4d6fc6662be" }], "member_of_research_project": "TCGA-OV", "primary_diagnosis_condition": "Cystic, Mucinous and Serous Neoplasms", "primary_diagnosis_site": "Ovary", "Diagnosis": [{ "id": "dc8af98b-03cb-5817-84fa-d86a7f2df8c6", "identifier": [{ "system": "GDC", "value": "dc8af98b-03cb-5817-84fa-d86a7f2df8c6" }], "primary_diagnosis": "Serous cystadenocarcinoma, NOS", "age_at_diagnosis": "28779", "morphology": "8441/3", "stage": null, "grade": "not reported", "method_of_diagnosis": null, "Treatment": [{ "id": "ecd1e57a-8a3c-5591-9cc4-fd490f66f24c", "identifier": [{ "system": "GDC", "value": "ecd1e57a-8a3c-5591-9cc4-fd490f66f24c" }], "treatment_type": "Pharmaceutical Therapy, NOS", "treatment_outcome": null, "days_to_treatment_start": null, "days_to_treatment_end": null, "therapeutic_agent": null, "treatment_anatomic_site": null, "treatment_effect": null, "treatment_end_reason": null, "number_of_cycles": null }, { "id": "68aece6e-2de6-506a-8190-7433a1f5b8eb", "identifier": [{ "system": "GDC", "value": "68aece6e-2de6-506a-8190-7433a1f5b8eb" }], "treatment_type": "Radiation Therapy, NOS", "treatment_outcome": null, "days_to_treatment_start": null, "days_to_treatment_end": null, "therapeutic_agent": null, "treatment_anatomic_site": null, "treatment_effect": null, "treatment_end_reason": null, "number_of_cycles": null }] }], "Specimen": [{ "id": "a76ac995-782d-4ba3-a029-40f8d7d9642b", "identifier": [{ "system": "GDC", "value": "a76ac995-782d-4ba3-a029-40f8d7d9642b" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Blood Derived Normal", "specimen_type": "sample", "derived_from_specimen": "initial specimen", "derived_from_subject": "TCGA-29-2435" }, { "id": "c34bf82c-7b38-482a-bfe3-f2750f1a24bc", "identifier": [{ "system": "GDC", "value": "c34bf82c-7b38-482a-bfe3-f2750f1a24bc" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Blood Derived Normal", "specimen_type": "portion", "derived_from_specimen": "a76ac995-782d-4ba3-a029-40f8d7d9642b", "derived_from_subject": "TCGA-29-2435" }, { "id": "69a797dd-1083-4a83-a184-ec19f704d770", "identifier": [{ "system": "GDC", "value": "69a797dd-1083-4a83-a184-ec19f704d770" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "sample", "derived_from_specimen": "initial specimen", "derived_from_subject": "TCGA-29-2435" }, { "id": "84e1540d-c756-414e-92c9-3e0d5f45cd48", "identifier": [{ "system": "GDC", "value": "84e1540d-c756-414e-92c9-3e0d5f45cd48" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "portion", "derived_from_specimen": "69a797dd-1083-4a83-a184-ec19f704d770", "derived_from_subject": "TCGA-29-2435" }, { "id": "49cf0c2e-45b3-4a61-97ad-8cae610313d0", "identifier": [{ "system": "GDC", "value": "49cf0c2e-45b3-4a61-97ad-8cae610313d0" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "portion", "derived_from_specimen": "69a797dd-1083-4a83-a184-ec19f704d770", "derived_from_subject": "TCGA-29-2435" }, { "id": "169f0540-bdac-416d-b239-74b4804b140b", "identifier": [{ "system": "GDC", "value": "169f0540-bdac-416d-b239-74b4804b140b" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "portion", "derived_from_specimen": "69a797dd-1083-4a83-a184-ec19f704d770", "derived_from_subject": "TCGA-29-2435" }, { "id": "3302664d-2406-4641-b54f-daf7400522b8", "identifier": [{ "system": "GDC", "value": "3302664d-2406-4641-b54f-daf7400522b8" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "slide", "derived_from_specimen": "169f0540-bdac-416d-b239-74b4804b140b", "derived_from_subject": "TCGA-29-2435" }, { "id": "b317672c-b196-428f-9c3f-077fdd24e96e", "identifier": [{ "system": "GDC", "value": "b317672c-b196-428f-9c3f-077fdd24e96e" }], "associated_project": "TCGA-OV", "age_at_collection": "-28779", "primary_disease_type": "Cystic, Mucinous and Serous Neoplasms", "anatomical_site": null, "source_material_type": "Primary Tumor", "specimen_type": "slide", "derived_from_specimen": "169f0540-bdac-416d-b239-74b4804b140b", "derived_from_subject": "TCGA-29-2435" }], "id_2": "dc8af98b-03cb-5817-84fa-d86a7f2df8c6", "identifier_2": [{ "system": "GDC", "value": "dc8af98b-03cb-5817-84fa-d86a7f2df8c6" }], "primary_diagnosis": "Serous cystadenocarcinoma, NOS", "age_at_diagnosis": "28779", "morphology": "8441/3", "stage": null, "grade": "not reported", "method_of_diagnosis": null, "Treatment": [{ "id": "ecd1e57a-8a3c-5591-9cc4-fd490f66f24c", "identifier": [{ "system": "GDC", "value": "ecd1e57a-8a3c-5591-9cc4-fd490f66f24c" }], "treatment_type": "Pharmaceutical Therapy, NOS", "treatment_outcome": null, "days_to_treatment_start": null, "days_to_treatment_end": null, "therapeutic_agent": null, "treatment_anatomic_site": null, "treatment_effect": null, "treatment_end_reason": null, "number_of_cycles": null }, { "id": "68aece6e-2de6-506a-8190-7433a1f5b8eb", "identifier": [{ "system": "GDC", "value": "68aece6e-2de6-506a-8190-7433a1f5b8eb" }], "treatment_type": "Radiation Therapy, NOS", "treatment_outcome": null, "days_to_treatment_start": null, "days_to_treatment_end": null, "therapeutic_agent": null, "treatment_anatomic_site": null, "treatment_effect": null, "treatment_end_reason": null, "number_of_cycles": null }]} From cbfce858a4e3ae7beb3a843b24e1bd330fed7cb8 Mon Sep 17 00:00:00 2001 From: Frederick Kaufman Date: Thu, 24 Mar 2022 14:53:44 -0400 Subject: [PATCH 15/18] code bugs --- .../terra/cda/app/flatten/JsonFlattener.java | 59 +++++++------------ .../bio/terra/cda/app/flatten/OrderJson.java | 19 +++--- .../bio/terra/cda/app/flatten/model/Row.java | 6 +- .../terra/cda/app/flatten/FlattenTest.java | 15 ++--- .../terra/cda/app/flatten/model/RowTest.java | 12 ++-- 5 files changed, 51 insertions(+), 60 deletions(-) diff --git a/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java b/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java index ce95eddb..8cd2d4f5 100644 --- a/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java +++ b/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java @@ -31,29 +31,17 @@ */ public class JsonFlattener { - private String jsonString = null; - private List sheetMatrix = null; - private List pathList = null; - - private String tmp[] = null; - - private HashSet primitivePath = null; - private HashSet primitiveUniquePath = null; private List unique = null; + private String tmpPath = null; + private String regex = "(\\[[0-9]*\\]$)"; private Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE); - private JsonElement element = null; - - private String tmpPath = null; - private OrderJson makeOrder = new OrderJson(); - public JsonFlattener() {} - /** * This method does some pre processing and then calls make2D() to get the spreadsheet * representation of Json document. @@ -61,7 +49,6 @@ public JsonFlattener() {} * @return returns a JsonFlattener object */ public JsonFlattener json2Sheet(String jsonString, String includeHeaders) { - this.jsonString = jsonString; Configuration.setDefaults( new Configuration.Defaults() { @@ -96,16 +83,16 @@ public Set options() { DocumentContext parse = null; - sheetMatrix = new ArrayList(); + sheetMatrix = new ArrayList<>(); - element = new JsonParser().parse(this.jsonString); + JsonElement element = new JsonParser().parse(jsonString); - pathList = JsonPath.using(pathConf).parse(this.jsonString).read("$..*"); + List pathList = JsonPath.using(pathConf).parse(jsonString).read("$..*"); - parse = JsonPath.using(conf).parse(this.jsonString); + parse = JsonPath.using(conf).parse(jsonString); - primitivePath = new LinkedHashSet(); - primitiveUniquePath = new LinkedHashSet(); + HashSet primitivePath = new LinkedHashSet<>(); + HashSet primitiveUniquePath = new LinkedHashSet<>(); for (String o : pathList) { Object tmp = parse.read(o); @@ -132,7 +119,7 @@ public Set options() { Matcher m = pattern.matcher(o); if (m.find()) { - tmp = o.replace("$", "").split("(\\[[0-9]*\\]$)"); + String[] tmp = o.replace("$", "").split("(\\[[0-9]*\\]$)"); tmp[0] = tmp[0].replaceAll("(\\[[0-9]*\\])", ""); primitiveUniquePath.add( (tmp[0] + m.group()) @@ -152,7 +139,7 @@ public Set options() { } } - unique = new ArrayList(primitiveUniquePath); + unique = new ArrayList<>(primitiveUniquePath); // choose to suppress the header row if we are aggregating multiple input results downstream. if (includeHeaders.equals("true")) { @@ -170,8 +157,8 @@ public Set options() { // adding all the content of csv sheetMatrix.add(make2D(new Object[unique.size()], new Object[unique.size()], element, "$")); - Object last[] = sheetMatrix.get(sheetMatrix.size() - 1); - Object secondLast[] = sheetMatrix.get(sheetMatrix.size() - 2); + Object[] last = sheetMatrix.get(sheetMatrix.size() - 1); + Object[] secondLast = sheetMatrix.get(sheetMatrix.size() - 2); boolean delete = true; @@ -281,7 +268,7 @@ private Object[] make2D(Object[] current, Object[] old, JsonElement element, Str Matcher m = pattern.matcher(tmpPath); if (m.find()) { - String tmp1[] = tmpPath.replace("$", "").split("(\\[[0-9]*\\]$)"); + String[] tmp1 = tmpPath.replace("$", "").split("(\\[[0-9]*\\]$)"); tmp1[0] = tmp1[0].replaceAll("(\\[[0-9]*\\])", ""); tmpPath = ((tmp1[0] + m.group()) @@ -342,14 +329,12 @@ private Object[] make2D(Object[] current, Object[] old, JsonElement element, Str private boolean isInnerArray(JsonElement element) { for (Map.Entry entry : element.getAsJsonObject().entrySet()) { - if (entry.getValue().isJsonArray()) { - if (entry.getValue().getAsJsonArray().size() > 0) - for (JsonElement checkPrimitive : entry.getValue().getAsJsonArray()) { - - if (checkPrimitive.isJsonObject()) { - return true; - } + if (entry.getValue().isJsonArray() && entry.getValue().getAsJsonArray().size() > 0) { + for (JsonElement checkPrimitive : entry.getValue().getAsJsonArray()) { + if (checkPrimitive.isJsonObject()) { + return true; } + } } } return false; @@ -363,7 +348,7 @@ private boolean isInnerArray(JsonElement element) { * @return JFlat * @throws Exception */ - public JsonFlattener headerSeparator(String separator) throws Exception { + public JsonFlattener headerSeparator(String separator) throws IllegalArgumentException { try { int sheetMatrixLen = this.sheetMatrix.get(0).length; @@ -380,7 +365,7 @@ public JsonFlattener headerSeparator(String separator) throws Exception { } } catch (NullPointerException nullex) { - throw new Exception( + throw new IllegalArgumentException( "The JSON document hasn't been transformed yet. Try using json2Sheet() before using headerSeparator"); } return this; @@ -401,7 +386,7 @@ public List getJsonAsSheet() { * @return List */ public List getJsonAsSpreadsheet() { - List spreadsheet = new ArrayList(); + List spreadsheet = new ArrayList<>(); for (Object[] sheetRow : this.sheetMatrix) { spreadsheet.add(Row.toSpreadsheetRow(sheetRow)); } @@ -453,7 +438,7 @@ public void write2csv(String destination, char delimiter) */ public String write2csv(char delimiter) { boolean comma = false; - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); for (Object[] o : this.sheetMatrix) { comma = false; for (Object t : o) { diff --git a/src/main/java/bio/terra/cda/app/flatten/OrderJson.java b/src/main/java/bio/terra/cda/app/flatten/OrderJson.java index 2a1fe639..f869884e 100644 --- a/src/main/java/bio/terra/cda/app/flatten/OrderJson.java +++ b/src/main/java/bio/terra/cda/app/flatten/OrderJson.java @@ -6,11 +6,14 @@ import java.lang.reflect.Type; import java.util.LinkedHashMap; import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class OrderJson { - + private static final Logger logger = LoggerFactory.getLogger(OrderJson.class); Type type = new TypeToken>() {}.getType(); + // LinkedHashMap to maintain insertion order Map origMap = null; Map jsonPrimitive = null; @@ -25,12 +28,9 @@ public OrderJson() { public JsonElement orderJson(JsonElement element) { - // LinkedHashMap to maintain insertion order - origMap = new LinkedHashMap(); - - jsonPrimitive = new LinkedHashMap(); - jsonArray = new LinkedHashMap(); - jsonObject = new LinkedHashMap(); + jsonPrimitive = new LinkedHashMap<>(); + jsonArray = new LinkedHashMap<>(); + jsonObject = new LinkedHashMap<>(); // converting JsonElement to Map origMap = gson.fromJson(element, type); @@ -40,8 +40,7 @@ public JsonElement orderJson(JsonElement element) { try { // adding check if value of key in json is null - if (entry.getValue() == null - || entry.getValue().getClass().getSimpleName().equals("ArrayList")) { + if (entry.getValue() == null || entry.getValue().getClass().isInstance("ArrayList")) { // if Object is of type ArrayList push it to jsonArray Map jsonArray.put(entry.getKey(), entry.getValue()); @@ -52,7 +51,7 @@ public JsonElement orderJson(JsonElement element) { jsonPrimitive.put(entry.getKey(), entry.getValue()); } } catch (Exception ex) { - ex.printStackTrace(); + logger.error(ex.getMessage()); } } diff --git a/src/main/java/bio/terra/cda/app/flatten/model/Row.java b/src/main/java/bio/terra/cda/app/flatten/model/Row.java index 79c03147..a8a2ecc9 100644 --- a/src/main/java/bio/terra/cda/app/flatten/model/Row.java +++ b/src/main/java/bio/terra/cda/app/flatten/model/Row.java @@ -2,11 +2,15 @@ public class Row { + private Row() {} + public static String toSpreadsheetRow(Object[] cellData) { StringBuilder builder = new StringBuilder(); for (int i = 0; i < cellData.length - 1; i++) { if (cellData[i] != null) { - cellData[i].toString().replaceAll(",", "\\,"); + if (cellData[i].toString().contains(",")) { + cellData[i] = cellData[i].toString().replace(",", "\\,"); + } builder.append(cellData[i].toString() + "\t"); } else { builder.append("\t"); diff --git a/src/test/java/bio/terra/cda/app/flatten/FlattenTest.java b/src/test/java/bio/terra/cda/app/flatten/FlattenTest.java index 9e620e7d..80d589c5 100644 --- a/src/test/java/bio/terra/cda/app/flatten/FlattenTest.java +++ b/src/test/java/bio/terra/cda/app/flatten/FlattenTest.java @@ -24,13 +24,13 @@ class FlattenTest { static final Path TEST_FILES = Paths.get("src/test/resources/json_files"); @Test - public void testFlatteningUsingJsonFlattener() throws Exception { + void testFlatteningUsingJsonFlattener() throws Exception { String jsonString = Files.readString(TEST_FILES.resolve("gdc_no_files1.json")); JsonFlattener jflat = new JsonFlattener(); List json2csv = jflat.json2Sheet(jsonString, "true").getJsonAsSpreadsheet(); - assertEquals(json2csv.size(), 29); + assertEquals(30, json2csv.size()); assertTrue( jflat .json2Sheet(jsonString, "true") @@ -39,7 +39,7 @@ public void testFlatteningUsingJsonFlattener() throws Exception { } @Disabled - public void testPathList() throws Exception { + void testPathList() throws Exception { Configuration.setDefaults( new Configuration.Defaults() { private final JsonProvider jsonProvider = new JacksonJsonProvider(); @@ -80,18 +80,19 @@ public Set options() { } @Test - public void json2SheetWriter() throws Exception { + void testJson2SheetWriter() throws Exception { String jsonString = Files.readString(TEST_FILES.resolve("gdc_no_files1.json")); JsonFlattener jflat = new JsonFlattener().json2Sheet(jsonString, "true"); List objects = jflat.getJsonAsSheet(); - assertEquals(objects.size(), 29); + assertEquals(30, objects.size()); List rows = jflat.getJsonAsSpreadsheet(); - assertEquals(rows.size(), 29); + assertEquals(30, rows.size()); } + // Move to integration tests @Disabled - public void testCsvWriter() throws Exception { + void testCsvWriter() throws Exception { String jsonString = Files.readString(TEST_FILES.resolve("gdc_no_files1.json")); JsonFlattener jflat = new JsonFlattener(); diff --git a/src/test/java/bio/terra/cda/app/flatten/model/RowTest.java b/src/test/java/bio/terra/cda/app/flatten/model/RowTest.java index 7d06abc2..2f2cc779 100644 --- a/src/test/java/bio/terra/cda/app/flatten/model/RowTest.java +++ b/src/test/java/bio/terra/cda/app/flatten/model/RowTest.java @@ -1,17 +1,19 @@ package bio.terra.cda.app.flatten.model; -import org.junit.jupiter.api.Disabled; +import static org.junit.jupiter.api.Assertions.assertTrue; -public class RowTest { +import org.junit.jupiter.api.Test; + +class RowTest { static final String row_content = "TCGA-29-2435,Homo sapiens,female,white,not reported,-28779,Alive,,,8fa35df3-f544-4c47-bdd1-e4d6fc6662be,TCGA-OV"; - @Disabled - public void testRowCreation() throws Exception { + @Test + void testRowCreation() throws Exception { Object[] cells = row_content.split(","); String row = Row.toSpreadsheetRow(cells); - // assertEquals(row.getCells().length, 11); + assertTrue(row.contains("Homo sapiens")); } } From 1a576937429466616768a2e701d25209e44af7da Mon Sep 17 00:00:00 2001 From: Frederick Kaufman Date: Fri, 25 Mar 2022 10:29:39 -0400 Subject: [PATCH 16/18] code smells 2 --- .../terra/cda/app/flatten/JsonFlattener.java | 32 +++++-------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java b/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java index 8cd2d4f5..faa653d1 100644 --- a/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java +++ b/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java @@ -155,7 +155,7 @@ public Set options() { } // adding all the content of csv - sheetMatrix.add(make2D(new Object[unique.size()], new Object[unique.size()], element, "$")); + sheetMatrix.add(make2D(new Object[unique.size()], element, "$")); Object[] last = sheetMatrix.get(sheetMatrix.size() - 1); Object[] secondLast = sheetMatrix.get(sheetMatrix.size() - 2); @@ -187,15 +187,14 @@ public Set options() { /** * This function transforms the JSON document to its equivalent 2D representation. * - * @param current its the logical current row of the Json being processed * @param old it keeps the old row which is always assigned to the current row. * @param element this keeps the part of json being parsed to 2D. * @param path this mantains the path of the Json element being processed. * @return */ - private Object[] make2D(Object[] current, Object[] old, JsonElement element, String path) { + private Object[] make2D(Object[] old, JsonElement element, String path) { - current = old.clone(); + Object[] current = old.clone(); boolean gotArray = false; @@ -244,17 +243,11 @@ private Object[] make2D(Object[] current, Object[] old, JsonElement element, Str } else if (entry.getValue().isJsonObject()) { current = make2D( - new Object[unique.size()], - current, - entry.getValue().getAsJsonObject(), - path + "['" + entry.getKey() + "']"); + current, entry.getValue().getAsJsonObject(), path + "['" + entry.getKey() + "']"); } else if (entry.getValue().isJsonArray()) { current = make2D( - new Object[unique.size()], - current, - entry.getValue().getAsJsonArray(), - path + "['" + entry.getKey() + "']"); + current, entry.getValue().getAsJsonArray(), path + "['" + entry.getKey() + "']"); } } @@ -297,21 +290,12 @@ private Object[] make2D(Object[] current, Object[] old, JsonElement element, Str if (tmp.isJsonObject()) { gotArray = isInnerArray(tmp); - sheetMatrix.add( - make2D( - new Object[unique.size()], - current, - tmp.getAsJsonObject(), - path + "[" + arrIndex + "]")); + sheetMatrix.add(make2D(current, tmp.getAsJsonObject(), path + "[" + arrIndex + "]")); if (gotArray) { sheetMatrix.remove(sheetMatrix.size() - 1); } } else if (tmp.isJsonArray()) { - make2D( - new Object[unique.size()], - current, - tmp.getAsJsonArray(), - path + "[" + arrIndex + "]"); + make2D(current, tmp.getAsJsonArray(), path + "[" + arrIndex + "]"); } } arrIndex++; @@ -360,7 +344,7 @@ public JsonFlattener headerSeparator(String separator) throws IllegalArgumentExc .get(0)[I] .toString() .replaceFirst("^\\/", "") - .replaceAll(".", separator) + .replaceAll("\\.", separator) .trim(); } From c703bcc46fd3ae38c4d10e9d0513cc7f20299c7a Mon Sep 17 00:00:00 2001 From: Frederick Kaufman Date: Fri, 25 Mar 2022 12:22:12 -0400 Subject: [PATCH 17/18] code smells 3 --- src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java b/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java index faa653d1..f3d28c3f 100644 --- a/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java +++ b/src/main/java/bio/terra/cda/app/flatten/JsonFlattener.java @@ -344,7 +344,7 @@ public JsonFlattener headerSeparator(String separator) throws IllegalArgumentExc .get(0)[I] .toString() .replaceFirst("^\\/", "") - .replaceAll("\\.", separator) + .replace("\\.", separator) .trim(); } From 0437f6b70df604833c0db499406e62ff5db98d81 Mon Sep 17 00:00:00 2001 From: Dion Boles <82461519+dionboles-asym@users.noreply.github.com> Date: Fri, 1 Apr 2022 14:58:04 -0400 Subject: [PATCH 18/18] fixed json results error 2d array returned when expected 1d array --- .../cda/app/controller/QueryApiController.java | 3 ++- .../bio/terra/cda/app/service/QueryService.java | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java index a38ffa1e..081346ec 100644 --- a/src/main/java/bio/terra/cda/app/controller/QueryApiController.java +++ b/src/main/java/bio/terra/cda/app/controller/QueryApiController.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.Collections; import java.util.Set; import javax.servlet.http.HttpServletRequest; import javax.validation.Valid; @@ -67,7 +68,7 @@ public ResponseEntity query( queryService.getQueryResults(id, offset, limit, format, includeHeaders); QueryResponseData response = new QueryResponseData() - .result(result.items) + .result(Collections.unmodifiableList(result.items)) .totalRowCount(result.totalRowCount) .querySql(result.querySql); int nextPage = result.items.size() + limit; diff --git a/src/main/java/bio/terra/cda/app/service/QueryService.java b/src/main/java/bio/terra/cda/app/service/QueryService.java index a3b39f7f..0a152080 100644 --- a/src/main/java/bio/terra/cda/app/service/QueryService.java +++ b/src/main/java/bio/terra/cda/app/service/QueryService.java @@ -148,12 +148,12 @@ public void setBigQuery(BigQuery bigQuery) { this.bigQuery = bigQuery; } - public static class QueryResult { - public final List items; + public static class QueryResult { + public final List items; public final Long totalRowCount; public final String querySql; - QueryResult(List items, Long totalRowCount, String querySql) { + QueryResult(List items, Long totalRowCount, String querySql) { this.items = new ArrayList<>(items); this.totalRowCount = totalRowCount; this.querySql = querySql; @@ -204,7 +204,7 @@ private QueryResult getJobResults( List spreadsheet = new ArrayList<>(); JsonFlattener jflat = new JsonFlattener(); int bqResults = 0; - for (JsonNode jsonRow : jsonData) { + for (var jsonRow : jsonData) { if (bqResults > 0) { includeHeaders = "false"; } @@ -216,14 +216,14 @@ private QueryResult getJobResults( logQuery(queryJob, jsonData); - return new QueryResult(spreadsheet, result.getTotalRows(), getSqlFromJob(queryJob)); + return new QueryResult(spreadsheet, result.getTotalRows(), getSqlFromJob(queryJob)); } logQuery(queryJob, jsonData); - List jsonRowData = new ArrayList<>(); - jsonRowData.add(jsonData); - return new QueryResult(jsonRowData, result.getTotalRows(), getSqlFromJob(queryJob)); +// List jsonRowData = new ArrayList<>(); +// jsonRowData.add(jsonData); + return new QueryResult(jsonData, result.getTotalRows(), getSqlFromJob(queryJob)); } catch (InterruptedException e) { currentThread().interrupt();