From 6f595e22295d22e6533f865e09ac0e9897c32ce5 Mon Sep 17 00:00:00 2001 From: Dion Boles <82461519+dionboles-asym@users.noreply.github.com> Date: Wed, 22 Nov 2023 13:24:23 -0500 Subject: [PATCH 01/13] Updated unique terms to work with previous functionality, and fixed a pagination bug (#221) * Updated unique terms to work with previous functionality, and fixed a pagination bug * updated test with new name and removed commented code * remove the test on remote * Updated descriptions for unique terms * updated show_counts to state that is nonfunctional --- cdapython/Paginator.py | 4 +- cdapython/results/page_result.py | 7 ++- cdapython/utils/utility.py | 55 ++++++++++++++----- tests/test_C_integration.py | 2 +- ...rate_using_pandas_outside_of_cda_python.py | 19 +++++++ tests/test_get_all_reset_CD-636.py | 8 +-- tests/test_unique_get_all.py | 2 +- tests/test_unique_terms_check.py | 10 +--- tests/test_unique_terms_page_bug.py | 12 ++++ tests/test_unqine_terms_get_all.py | 2 +- tests/test_url_next2.py | 5 +- tests/unique_term_str.py | 13 ----- tests/unique_terms_page_bug.py | 9 --- 13 files changed, 89 insertions(+), 59 deletions(-) create mode 100644 tests/test_demonstrate_using_pandas_outside_of_cda_python.py create mode 100644 tests/test_unique_terms_page_bug.py delete mode 100644 tests/unique_term_str.py delete mode 100644 tests/unique_terms_page_bug.py diff --git a/cdapython/Paginator.py b/cdapython/Paginator.py index 6ee9b3c1..6b8de6e6 100644 --- a/cdapython/Paginator.py +++ b/cdapython/Paginator.py @@ -100,8 +100,8 @@ def _do_next(self: Paginator) -> Union[DataFrame, List[Any], Result, None]: advance=self.result.count, refresh=True, ) - if not self.result.has_next_page: - self.stopped = True + if self.result.has_next_page == False: + self.stopped = True if self.output != "": return self._return_result() return self.result diff --git a/cdapython/results/page_result.py b/cdapython/results/page_result.py index fa3c3b9d..ece0ac61 100644 --- a/cdapython/results/page_result.py +++ b/cdapython/results/page_result.py @@ -257,8 +257,11 @@ def next_page( include_total_count=True, show_term_count=show_term_count, ) - self.total_row_count = next_result.total_row_count - return next_result + + if next_result.total_row_count is not None: + self.total_row_count = next_result.total_row_count + return next_result + return None def prev_page( self, diff --git a/cdapython/utils/utility.py b/cdapython/utils/utility.py index 0f327d12..f91e9149 100644 --- a/cdapython/utils/utility.py +++ b/cdapython/utils/utility.py @@ -17,7 +17,7 @@ from cdapython.exceptions.custom_exception import HTTP_ERROR_API, HTTP_ERROR_SERVICE from cdapython.results.columns_result import ColumnsResult from cdapython.results.factories.collect_result import CollectResult -from cdapython.results.page_result import get_query_result +from cdapython.results.page_result import Paged_Result, get_query_result from cdapython.results.string_result import StringResult from cdapython.utils.Cda_Configuration import CdaConfiguration @@ -101,15 +101,34 @@ def get_table_version() -> str: def unique_terms( col_name: str, system: str = "", -) -> Q: + offset: int = 0, + host: Optional[str] = None, + verify: Optional[bool] = None, + async_req: Optional[bool] = True, + show_sql: bool = False, + show_counts: bool = False, + show_term_count: bool = False, + verbose: bool = True, + limit: int = 100, +) -> Paged_Result: + # TODO : Should the value be changed? The "show_term_count" parameter was copied from Swagger-generated code. Is this description sufficient? """ Show all unique terms for a given column. Args: - col_name (str): _description_ - system (str, optional): _description_. Defaults to "". + col_name (str): This is the default way to search for a unique term from the CDA service API. + system (str, optional): his is an optional parameter used to filter the search values by data center, such as "GDC", "IDC", "PDC", or "CDS". Defaults to "". + offset (int, optional): The number of entries to skip. Defaults to 0. + host (Optional[str], optional): This is where the user can set a host for a different server. Defaults to None. + verify (Optional[bool], optional): This will send a request to the cda server without verifying the SSL Cert Verification. Defaults to None. + async_req (Optional[bool], optional): Execute request asynchronously. Defaults to True. + show_sql (bool, optional): This will show the sql returned from the server. Defaults to False. + show_counts (bool, optional): Currently, the functionality is nonfunctional and pending further investigation to determine if it is needed: This will show or hide the count. Defaults to False. + show_term_count (bool, optional): Show the number of occurrences for each value. Defaults to False. + verbose (bool, optional): This will hide or show values that are automatic printed when Q runs. Defaults to True. + limit (int, optional): the numbers of entries to return per page of data. Defaults to 100. Returns: - Q: _description_ + Paged_Result """ # cda_client_obj.select_header_content_type(["text/plain"]) if system: @@ -117,7 +136,17 @@ def unique_terms( q_object._set_system(system) else: q_object = Q(col_name).unique_terms - return q_object + return q_object.run( + offset=offset, + limit=limit, + host=host, + verify=verify, + show_sql=show_sql, + show_count=show_counts, + show_term_count=show_term_count, + verbose=verbose, + async_call=async_req, + ) def columns( @@ -131,14 +160,12 @@ def columns( """ The Columns method displays all searchable columns in the CDA. Args: - version (Optional[str], optional): _description_. Defaults to None. - host (Optional[str], optional): _description_. Defaults to None. - table (Optional[str], optional): _description_. Defaults to None. - verify (Optional[bool], optional): _description_. Defaults to None. - async_req (Optional[bool], optional): _description_. Defaults to True. - show_sql (bool, optional): _description_. Defaults to False. - verbose (bool, optional): _description_. Defaults to True. - description (bool, optional): _description_. Defaults to True. + host (Optional[str], optional): This is where the user can set a host for a different server. Defaults to None. + verify (Optional[bool], optional): This will send a request to the cda server without verifying the SSL Cert Verification. Defaults to None. + async_req (Optional[bool], optional): Execute request asynchronously. Defaults to True. + show_sql (bool, optional): This will show the sql returned from the server. Defaults to False. + verbose (bool, optional): This will hide or show values that are automatic printed when Q runs. Defaults to True. + description (bool, optional): This parameter will return a description from the server of the columns. Defaults to True. Returns: Optional[ColumnsResult]: _description_ diff --git a/tests/test_C_integration.py b/tests/test_C_integration.py index 87a47365..188bf820 100644 --- a/tests/test_C_integration.py +++ b/tests/test_C_integration.py @@ -10,7 +10,7 @@ def test_basic_integration() -> None: def test_unique_terms() -> None: - terms = unique_terms("sex", "GDC").run(host=host, show_term_count=True) + terms = unique_terms(col_name="sex", system="GDC", host=host, show_term_count=True) list_terms = terms.to_list() flat_terms = [list(i.values())[0] for i in list_terms] assert "female" in flat_terms diff --git a/tests/test_demonstrate_using_pandas_outside_of_cda_python.py b/tests/test_demonstrate_using_pandas_outside_of_cda_python.py new file mode 100644 index 00000000..649aeaa4 --- /dev/null +++ b/tests/test_demonstrate_using_pandas_outside_of_cda_python.py @@ -0,0 +1,19 @@ +from pandas import json_normalize +from pandas.testing import assert_frame_equal +from cdapython import unique_terms, Q + + +def test_demonstrate_list_to_df_with_search_replacement(): + # new way + df = json_normalize( + data=unique_terms(col_name="primary_diagnosis_site", show_counts=True).to_list() + ) + new_way = df.loc[ + df["primary_diagnosis_site"].str.contains("gland", case=False, na=False) + ].reset_index(drop=True) + # old way + old_way = unique_terms("primary_diagnosis_site", show_counts=True).to_dataframe( + search_fields="primary_diagnosis_site", search_value="gland" + ) + + assert_frame_equal(new_way, old_way) diff --git a/tests/test_get_all_reset_CD-636.py b/tests/test_get_all_reset_CD-636.py index 734e0b04..ea9c8024 100644 --- a/tests/test_get_all_reset_CD-636.py +++ b/tests/test_get_all_reset_CD-636.py @@ -11,7 +11,7 @@ def test_data_type_get_all(): - data_type = unique_terms("data_type").run(host=host, limit=10) + data_type = unique_terms("data_type", host=host, limit=10) assert len(data_type.to_dataframe()) == 10 all_data_type = data_type.get_all(limit=20) @@ -28,16 +28,14 @@ def test_data_type_get_all(): def test_data_type_show_term_count(): - data_type = unique_terms("data_type").run(host=host).get_all(show_term_count=True) + data_type = unique_terms("data_type", host=host).get_all(show_term_count=True) assert "count" in data_type.to_dataframe() @pytest.mark.skip(reason="CD-650 Backend always returns counts") def test_data_type_show_term_count_false(): with pytest.raises(KeyError): - data_type = ( - unique_terms("data_type").run(host=host).get_all(show_term_count=False) - ) + data_type = unique_terms("data_type", host=host).get_all(show_term_count=False) df = data_type.to_dataframe()["count"] assert df is None diff --git a/tests/test_unique_get_all.py b/tests/test_unique_get_all.py index 3e7f4d97..d61e535d 100644 --- a/tests/test_unique_get_all.py +++ b/tests/test_unique_get_all.py @@ -3,7 +3,7 @@ def test_data_type_get_all(): - data_type = unique_terms("data_type").run(host=host, limit=10) + data_type = unique_terms("data_type", host=host, limit=10) assert len(data_type.to_dataframe()) == 10 all_data_type = data_type.get_all(limit=20) diff --git a/tests/test_unique_terms_check.py b/tests/test_unique_terms_check.py index 34a160fa..77132035 100644 --- a/tests/test_unique_terms_check.py +++ b/tests/test_unique_terms_check.py @@ -3,13 +3,9 @@ def test_unique_terms_convert() -> None: - d = ( - unique_terms( - col_name="species", - ) - .run() - .to_dataframe() - ) + d = unique_terms( + col_name="species", + ).to_dataframe() test_unique_terms_convert() diff --git a/tests/test_unique_terms_page_bug.py b/tests/test_unique_terms_page_bug.py new file mode 100644 index 00000000..3eb15d20 --- /dev/null +++ b/tests/test_unique_terms_page_bug.py @@ -0,0 +1,12 @@ +from cdapython import unique_terms +from tests.global_settings import host, project + + +def test_unique_paginator(): + projlist = [] + for i in unique_terms( + col_name="sex", + host=host, + ).paginator(to_list=True): + projlist.extend(i) + assert len(projlist) > 0 diff --git a/tests/test_unqine_terms_get_all.py b/tests/test_unqine_terms_get_all.py index af7b899b..2e60e89b 100644 --- a/tests/test_unqine_terms_get_all.py +++ b/tests/test_unqine_terms_get_all.py @@ -34,5 +34,5 @@ @mock.patch("cdapython.unique_terms", return_value=fake_result) def test_unique_terms_get_all(_: Any) -> None: - terms_list = unique_terms("sex").run().get_all().to_list() + terms_list = unique_terms("sex").get_all().to_list() assert len(terms_list) != 0 diff --git a/tests/test_url_next2.py b/tests/test_url_next2.py index 62ca46b4..4affa295 100644 --- a/tests/test_url_next2.py +++ b/tests/test_url_next2.py @@ -1,12 +1,9 @@ -# pytest: skip -# This test file will be ignored by pytest import pytest from cdapython import Q, unique_terms from tests.global_settings import host -# @pytest.mark.skip(reason="currently total row count not being returned CD-610") def test_total_count_for_subject_id(): - u_sex = unique_terms("subject_id").run(host=host) + u_sex = unique_terms("subject_id", host=host) assert u_sex.total_row_count != 0 diff --git a/tests/unique_term_str.py b/tests/unique_term_str.py deleted file mode 100644 index df3cb3dd..00000000 --- a/tests/unique_term_str.py +++ /dev/null @@ -1,13 +0,0 @@ -from cdapython import unique_terms, Q - -Q.set_default_project_dataset("broad-dsde-dev.cda_dev") -Q.set_host_url("https://cancerdata.dsde-dev.broadinstitute.org/") -Q.get_host_url() -# from tests.global_settings import localhost - - -print( - unique_terms("primary_diagnosis_site", show_counts=True).to_dataframe( - search_fields="*", search_value="gland" - ) -) diff --git a/tests/unique_terms_page_bug.py b/tests/unique_terms_page_bug.py deleted file mode 100644 index 42db03fa..00000000 --- a/tests/unique_terms_page_bug.py +++ /dev/null @@ -1,9 +0,0 @@ -from cdapython import unique_terms -from tests.global_settings import host, project - -projlist = [] -for i in unique_terms("file_associated_project", host=host, table=project).paginator( - to_list=True -): - projlist.extend(i) -print(len(projlist)) From 75617ac21d4fd24bfd0ce8595ffcd0a226742ebd Mon Sep 17 00:00:00 2001 From: Dion Boles Date: Mon, 27 Nov 2023 13:37:03 -0500 Subject: [PATCH 02/13] Removed "show_term_count" and replaced it with "show_counts" to revert back to the expected values from the documentation. Additionally, updated the "paginator" and "get all" functions to accept "show_counts" and return the proper values for unique terms --- cdapython/Paginator.py | 6 +- cdapython/Q.py | 41 +++++++++---- .../factories/booleanquery/boolean_query.py | 2 +- cdapython/factories/count.py | 4 +- cdapython/factories/diagnosis/count.py | 4 +- cdapython/factories/diagnosis/diagnosis.py | 2 +- cdapython/factories/file.py | 2 +- cdapython/factories/file_count.py | 4 +- cdapython/factories/mutations/count.py | 16 +++-- cdapython/factories/mutations/mutations.py | 2 +- cdapython/factories/research_subject/count.py | 16 +++-- cdapython/factories/research_subject/file.py | 2 +- .../factories/research_subject/file_count.py | 16 +++-- .../research_subject/research_subject.py | 2 +- cdapython/factories/specimen/count.py | 4 +- cdapython/factories/specimen/file.py | 2 +- cdapython/factories/specimen/file_count.py | 4 +- cdapython/factories/specimen/specimen.py | 2 +- cdapython/factories/subject/count.py | 4 +- cdapython/factories/subject/file.py | 2 +- cdapython/factories/subject/file_count.py | 4 +- cdapython/factories/subject/subject.py | 2 +- cdapython/factories/treatment/count.py | 4 +- cdapython/factories/treatment/treatment.py | 2 +- .../unique_terms/unique_terms_endpoint.py | 6 +- cdapython/results/base.py | 2 - cdapython/results/columns_result.py | 2 - cdapython/results/factories/collect_result.py | 3 - .../results/factories/not_paginated_result.py | 2 - cdapython/results/page_result.py | 28 +++++---- cdapython/results/result.py | 2 - cdapython/results/string_result.py | 2 - cdapython/utils/Qconfig.py | 4 ++ cdapython/utils/utility.py | 11 +--- tests/Q_test_is.py | 1 - tests/async_strem_test.py | 2 +- tests/drs_id.py | 12 ++-- tests/fake_result.py | 4 -- tests/filter_test_error.py | 2 +- tests/jay_error.py | 2 +- tests/long_query_in.py | 2 +- tests/supermock.py | 2 - tests/test_C_integration.py | 2 +- tests/test_D_api_call.py | 1 - tests/test_E_ssl.py | 1 - tests/test_Q_count.py | 1 - tests/test_Q_not_like.py | 1 - tests/test_age_at_collection.py | 1 - tests/test_bigDownTheHouse.py | 1 - tests/test_checking_IN.py | 1 - tests/test_checking_IN_query_op.py | 1 - tests/test_donvan_error.py | 1 - tests/test_get_all_reset_CD-636.py | 8 +-- tests/test_global_counts.py | 1 - tests/test_in_pare.py | 1 - tests/test_kidney.py | 1 - tests/test_ssl_cert_checks.py | 1 - tests/test_unique_terms_page_bug.py | 59 +++++++++++++++++-- tests/test_unqine_terms_get_all.py | 1 - tests/testing_researchsubject_count.py | 1 - tests/testing_subject_count_copy.py | 1 - 61 files changed, 165 insertions(+), 158 deletions(-) diff --git a/cdapython/Paginator.py b/cdapython/Paginator.py index 6b8de6e6..07bcc425 100644 --- a/cdapython/Paginator.py +++ b/cdapython/Paginator.py @@ -40,7 +40,7 @@ def __init__( limit: int, format_type: str = "JSON", show_bar: bool = False, - show_term_count: bool = False, + show_counts: bool = False, ) -> None: self.result: Union[Paged_Result, StringResult] = result self.to_df: bool = to_df @@ -51,7 +51,7 @@ def __init__( self.output: str = output self.total_result: int = 0 self.progress_dirty: bool = False - self.show_term_count: bool = show_term_count + self.show_counts: bool = show_counts self.progress: Progress = Progress( TextColumn(text_format="[progress.description]{task.description}"), BarColumn(), @@ -85,7 +85,7 @@ def _do_next(self: Paginator) -> Union[DataFrame, List[Any], Result, None]: if self.result.has_next_page or not self.stopped: try: tmp_result = self.result.next_page( - limit=self.limit, show_term_count=self.show_term_count + limit=self.limit, show_counts=self.show_counts ) if tmp_result: self.result = tmp_result diff --git a/cdapython/Q.py b/cdapython/Q.py index 15abf789..bdbc7f95 100644 --- a/cdapython/Q.py +++ b/cdapython/Q.py @@ -152,6 +152,7 @@ def __init__( self._system = "" self.limit = None self.offset = None + self.show_counts = None if len(args) == 1: if args[0] is None: raise RuntimeError("Q statement parse error") @@ -301,6 +302,23 @@ def get_table(self) -> str: def get_verbose(self) -> bool: return self._config.verbose + def get_counts(self) -> bool: + return self._config.show_counts + + def set_counts(self, show_counts: bool) -> Q: + """ + this will set the private propey _verbose + Args: + value (bool) + + Returns: + Q + """ + config = self._config.copy_config() + config.show_counts = show_counts + self.show_counts = show_counts + return self.__class__(self.query, config=config) + def set_verbose(self, value: bool) -> Q: """ this will set the private propey _verbose @@ -568,7 +586,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: Optional[bool], + show_counts: Optional[bool], ) -> PagedResponseData: """ Call the endpoint to start the job for data collection. @@ -579,7 +597,7 @@ def _call_endpoint( limit (int): _description_ async_req (bool): _description_ include_total_count (bool): _description_ - show_term_frequency (Optional[bool]): _description_ + show_counts (Optional[bool]): _description_ Returns: PagedResponseData: _description_ @@ -592,7 +610,7 @@ def _call_endpoint( offset=offset, async_req=async_req, include_total_count=include_total_count, - show_term_count=show_term_count, + show_counts=show_counts, ) def _build_result_object( @@ -602,7 +620,6 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: Q, format_type: str = "json", ) -> Result: @@ -612,7 +629,6 @@ def _build_result_object( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, q_object=q_object, format_type=format_type, ) @@ -631,8 +647,7 @@ def run( include: Union[str, None] = None, format_type: str = "json", show_sql: bool = False, - show_count: bool = True, - show_term_count: bool = False, + show_counts: Optional[bool] = None, include_total_count: bool = True, ) -> Union[DryClass, Result, Paged_Result, None]: """ @@ -650,8 +665,7 @@ def run( include (Union[str, None], optional). Defaults to None. format_type (str, optional). Defaults to "json". show_sql (bool, optional). Defaults to False. - show_count (bool, optional). Defaults to True. - show_term_count (bool, optional). Defaults to False. + show_counts (bool, optional). Defaults to False. Returns: Union[QueryCreatedData, ApplyResult, Result, DryClass, None]: _description_ """ @@ -698,6 +712,12 @@ def run( if offset is None: offset = 0 + if show_counts is not None: + self._config.show_counts = show_counts + + if self.show_counts is not None: + show_counts = self.get_counts() + self._show_sql = show_sql or False try: @@ -717,7 +737,7 @@ def run( offset=offset, async_req=async_call, include_total_count=include_total_count, - show_term_count=show_term_count, + show_counts=show_counts, ) if isinstance(api_response, ApplyResult): if verbose: @@ -735,7 +755,6 @@ def run( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, q_object=self, format_type=format_type, ) diff --git a/cdapython/factories/booleanquery/boolean_query.py b/cdapython/factories/booleanquery/boolean_query.py index 9775d0b7..ea9d0d01 100644 --- a/cdapython/factories/booleanquery/boolean_query.py +++ b/cdapython/factories/booleanquery/boolean_query.py @@ -19,7 +19,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: Optional[bool], + show_counts: Optional[bool], ) -> PagedResponseData: """ Call the endpoint to start the job for data collection. diff --git a/cdapython/factories/count.py b/cdapython/factories/count.py index e6b8cf9c..b5c8dd17 100644 --- a/cdapython/factories/count.py +++ b/cdapython/factories/count.py @@ -27,7 +27,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: int, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.global_counts( query=self.query, @@ -42,7 +42,6 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: @@ -52,7 +51,6 @@ def _build_result_object( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/factories/diagnosis/count.py b/cdapython/factories/diagnosis/count.py index 2a49e7ed..56d0f5b1 100644 --- a/cdapython/factories/diagnosis/count.py +++ b/cdapython/factories/diagnosis/count.py @@ -31,7 +31,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.diagnosis_counts_query( query=self.query, @@ -46,7 +46,6 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: @@ -56,7 +55,6 @@ def _build_result_object( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/factories/diagnosis/diagnosis.py b/cdapython/factories/diagnosis/diagnosis.py index df6c3289..d36827b0 100644 --- a/cdapython/factories/diagnosis/diagnosis.py +++ b/cdapython/factories/diagnosis/diagnosis.py @@ -25,7 +25,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.diagnosis_query( query=self.query, diff --git a/cdapython/factories/file.py b/cdapython/factories/file.py index 784b6a70..82104203 100644 --- a/cdapython/factories/file.py +++ b/cdapython/factories/file.py @@ -25,7 +25,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.files( query=self.query, diff --git a/cdapython/factories/file_count.py b/cdapython/factories/file_count.py index 25469e82..e892edf5 100644 --- a/cdapython/factories/file_count.py +++ b/cdapython/factories/file_count.py @@ -23,7 +23,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.file_counts_query( query=self.query, @@ -38,7 +38,6 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: @@ -48,7 +47,6 @@ def _build_result_object( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/factories/mutations/count.py b/cdapython/factories/mutations/count.py index 015fa5ad..cddbd051 100644 --- a/cdapython/factories/mutations/count.py +++ b/cdapython/factories/mutations/count.py @@ -27,7 +27,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.mutation_counts_query( query=self.query, @@ -42,18 +42,16 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: return CountResult( - api_response, - offset, - limit, - api_instance, - show_sql, - show_count, - format_type, + api_response=api_response, + offset=offset, + limit=limit, + api_instance=api_instance, + show_sql=show_sql, + format_type=format_type, ) class Factory(AbstractFactory): diff --git a/cdapython/factories/mutations/mutations.py b/cdapython/factories/mutations/mutations.py index a114b767..77d84efc 100644 --- a/cdapython/factories/mutations/mutations.py +++ b/cdapython/factories/mutations/mutations.py @@ -33,7 +33,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: """ This will call the mutation_query endpoint diff --git a/cdapython/factories/research_subject/count.py b/cdapython/factories/research_subject/count.py index 9b77f648..5e0f8920 100644 --- a/cdapython/factories/research_subject/count.py +++ b/cdapython/factories/research_subject/count.py @@ -28,7 +28,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.research_subject_counts_query( query=self.query, @@ -43,18 +43,16 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: return CountResult( - api_response, - offset, - limit, - api_instance, - show_sql, - show_count, - format_type, + api_response=api_response, + offset=offset, + limit=limit, + api_instance=api_instance, + show_sql=show_sql, + format_type=format_type, ) class Factory(AbstractFactory): diff --git a/cdapython/factories/research_subject/file.py b/cdapython/factories/research_subject/file.py index 84126424..e8e4b281 100644 --- a/cdapython/factories/research_subject/file.py +++ b/cdapython/factories/research_subject/file.py @@ -29,7 +29,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.research_subject_files_query( query=self.query, diff --git a/cdapython/factories/research_subject/file_count.py b/cdapython/factories/research_subject/file_count.py index 2e387fe0..44c3beaf 100644 --- a/cdapython/factories/research_subject/file_count.py +++ b/cdapython/factories/research_subject/file_count.py @@ -29,7 +29,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> QueryResponseData: return api_instance.research_subject_file_counts_query( query=self.query, @@ -44,18 +44,16 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: return CountResult( - api_response, - offset, - limit, - api_instance, - show_sql, - show_count, - format_type, + api_response=api_response, + offset=offset, + limit=limit, + api_instance=api_instance, + show_sql=show_sql, + format_type=format_type, ) class Factory(AbstractFactory): diff --git a/cdapython/factories/research_subject/research_subject.py b/cdapython/factories/research_subject/research_subject.py index f8afb55c..679fc46e 100644 --- a/cdapython/factories/research_subject/research_subject.py +++ b/cdapython/factories/research_subject/research_subject.py @@ -28,7 +28,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.research_subject_query( query=self.query, diff --git a/cdapython/factories/specimen/count.py b/cdapython/factories/specimen/count.py index 0bc271c3..8eab4a2c 100644 --- a/cdapython/factories/specimen/count.py +++ b/cdapython/factories/specimen/count.py @@ -27,7 +27,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.specimen_counts_query( query=self.query, @@ -42,7 +42,6 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: @@ -52,7 +51,6 @@ def _build_result_object( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/factories/specimen/file.py b/cdapython/factories/specimen/file.py index a3e928e4..660e5790 100644 --- a/cdapython/factories/specimen/file.py +++ b/cdapython/factories/specimen/file.py @@ -44,7 +44,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.specimen_files_query( query=self.query, diff --git a/cdapython/factories/specimen/file_count.py b/cdapython/factories/specimen/file_count.py index e9bb6733..666d3d46 100644 --- a/cdapython/factories/specimen/file_count.py +++ b/cdapython/factories/specimen/file_count.py @@ -30,7 +30,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.specimen_file_counts_query( query=self.query, @@ -45,7 +45,6 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: @@ -55,7 +54,6 @@ def _build_result_object( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/factories/specimen/specimen.py b/cdapython/factories/specimen/specimen.py index 25cfa01d..66d13221 100644 --- a/cdapython/factories/specimen/specimen.py +++ b/cdapython/factories/specimen/specimen.py @@ -28,7 +28,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.specimen_query( query=self.query, diff --git a/cdapython/factories/subject/count.py b/cdapython/factories/subject/count.py index cbca1518..cc87b1fa 100644 --- a/cdapython/factories/subject/count.py +++ b/cdapython/factories/subject/count.py @@ -28,7 +28,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.subject_counts_query( query=self.query, @@ -43,7 +43,6 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: @@ -53,7 +52,6 @@ def _build_result_object( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/factories/subject/file.py b/cdapython/factories/subject/file.py index f4c30e70..88f9af7a 100644 --- a/cdapython/factories/subject/file.py +++ b/cdapython/factories/subject/file.py @@ -28,7 +28,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.subject_files_query( query=self.query, diff --git a/cdapython/factories/subject/file_count.py b/cdapython/factories/subject/file_count.py index 1db43ee8..f1f6ee9c 100644 --- a/cdapython/factories/subject/file_count.py +++ b/cdapython/factories/subject/file_count.py @@ -30,7 +30,7 @@ def _call_endpoint( offset: int, limit: int, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.subject_file_counts_query( query=self.query, @@ -45,7 +45,6 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: @@ -55,7 +54,6 @@ def _build_result_object( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/factories/subject/subject.py b/cdapython/factories/subject/subject.py index c07f3fe1..50d8e35a 100644 --- a/cdapython/factories/subject/subject.py +++ b/cdapython/factories/subject/subject.py @@ -28,7 +28,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.subject_query( query=self.query, diff --git a/cdapython/factories/treatment/count.py b/cdapython/factories/treatment/count.py index b3f9526d..43181c47 100644 --- a/cdapython/factories/treatment/count.py +++ b/cdapython/factories/treatment/count.py @@ -22,7 +22,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.treatment_counts_query( query=self.query, @@ -37,7 +37,6 @@ def _build_result_object( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: "Q", format_type: str = "json", ) -> Result: @@ -47,7 +46,6 @@ def _build_result_object( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/factories/treatment/treatment.py b/cdapython/factories/treatment/treatment.py index 8c7dc8ff..cd0019ab 100644 --- a/cdapython/factories/treatment/treatment.py +++ b/cdapython/factories/treatment/treatment.py @@ -25,7 +25,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_term_count: bool, + show_counts: bool, ) -> Endpoint: return api_instance.treatments_query( query=self.query, diff --git a/cdapython/factories/unique_terms/unique_terms_endpoint.py b/cdapython/factories/unique_terms/unique_terms_endpoint.py index 8f25f8f7..eb2a6dcc 100644 --- a/cdapython/factories/unique_terms/unique_terms_endpoint.py +++ b/cdapython/factories/unique_terms/unique_terms_endpoint.py @@ -18,7 +18,7 @@ def _call_endpoint( async_req: bool, offset: int, limit: int, - show_term_count: Optional[bool], + show_counts: Optional[bool], include_total_count: bool, system: Optional[str] = "", ) -> Endpoint: @@ -27,7 +27,7 @@ def _call_endpoint( return api_instance.unique_values( body=self.query.value, system=system, - count=show_term_count, + count=show_counts, async_req=async_req, offset=offset, limit=limit, @@ -37,7 +37,7 @@ def _call_endpoint( return api_instance.unique_values( body=self.query.value, system=system, - count=show_term_count, + count=show_counts, async_req=async_req, offset=offset, limit=limit, diff --git a/cdapython/results/base.py b/cdapython/results/base.py index 3212b9b1..7af9986f 100644 --- a/cdapython/results/base.py +++ b/cdapython/results/base.py @@ -20,13 +20,11 @@ class BaseResult: def __init__( self, show_sql: bool, - show_count: bool, result: List[Any], format_type: str = "json", ) -> None: self._result: List[Any] = result self.show_sql: Optional[bool] = show_sql - self.show_count: Optional[bool] = show_count self.format_type: str = format_type self._df: DataFrame diff --git a/cdapython/results/columns_result.py b/cdapython/results/columns_result.py index ee8cb3d8..66492cdd 100644 --- a/cdapython/results/columns_result.py +++ b/cdapython/results/columns_result.py @@ -28,7 +28,6 @@ class ColumnsResult(BaseResult): def __init__( self, show_sql: bool, - show_count: bool, result: List[Any], description: bool = True, format_type: str = "json", @@ -38,7 +37,6 @@ def __init__( self._data_table: DataFrame super().__init__( show_sql=show_sql, - show_count=show_count, format_type=format_type, result=result, ) diff --git a/cdapython/results/factories/collect_result.py b/cdapython/results/factories/collect_result.py index 6549ff49..db802b43 100644 --- a/cdapython/results/factories/collect_result.py +++ b/cdapython/results/factories/collect_result.py @@ -29,7 +29,6 @@ def __init__( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, result: List[Any], format_type: str = "json", ) -> None: @@ -40,7 +39,6 @@ def __init__( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) @@ -109,7 +107,6 @@ def create(q_object: Result) -> CollectResult: limit=q_object._limit, api_instance=q_object._api_instance, show_sql=q_object.show_sql, - show_count=q_object.show_count, format_type="json", result=q_object._result, ) diff --git a/cdapython/results/factories/not_paginated_result.py b/cdapython/results/factories/not_paginated_result.py index c6036748..129bee8f 100644 --- a/cdapython/results/factories/not_paginated_result.py +++ b/cdapython/results/factories/not_paginated_result.py @@ -21,7 +21,6 @@ def __init__( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, format_type: str = "json", ) -> None: super().__init__( @@ -30,7 +29,6 @@ def __init__( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/results/page_result.py b/cdapython/results/page_result.py index ece0ac61..18195414 100644 --- a/cdapython/results/page_result.py +++ b/cdapython/results/page_result.py @@ -1,7 +1,7 @@ from __future__ import annotations from multiprocessing.pool import ApplyResult -from typing import TYPE_CHECKING, Any, Coroutine, List, Union, cast +from typing import TYPE_CHECKING, Any, Coroutine, List, Union, cast, Optional from urllib.parse import parse_qs, urlparse import anyio @@ -36,7 +36,6 @@ def __init__( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, q_object: Union[Q, None], format_type: str = "json", ) -> None: @@ -51,7 +50,6 @@ def __init__( api_instance=api_instance, api_response=api_response, show_sql=show_sql, - show_count=show_count, format_type=format_type, offset=offset, limit=limit, @@ -63,17 +61,18 @@ def _get_result( _limit: int, async_req: bool = False, include_total_count: bool = False, - show_term_count: bool = False, + show_counts: bool = False, ) -> Union[ApplyResult[Any], Paged_Result, Any, None]: if self.q_object: self.q_object: Q = self.q_object.set_verbose(False) + self.q_object: Q = self.q_object.set_counts(show_counts=show_counts) return self.q_object.set_config(config=self.q_object.get_config()).run( verbose=self.q_object.get_verbose(), offset=_offset, limit=_limit, async_call=async_req, include_total_count=include_total_count, - show_term_count=show_term_count, + show_counts=self.q_object.get_counts(), ) return None @@ -84,6 +83,7 @@ def paginator( to_list: bool = False, limit: int = 0, show_bar: bool = False, + show_counts: Optional[bool] = None, ) -> Paginator: """_summary_ paginator this will automatically page over results @@ -93,7 +93,8 @@ def paginator( Returns: _type_: _description_ """ - + if show_counts is None: + show_counts = self.q_object.get_counts() limit = limit if limit != 0 else self._limit return Paginator( @@ -104,6 +105,7 @@ def paginator( output=output, format_type=self.format_type, show_bar=show_bar, + show_counts=show_counts, ) def return_result( @@ -133,7 +135,7 @@ def get_all( show_bar: bool = True, to_df: bool = False, to_list: bool = False, - show_term_count: bool = False, + show_counts: bool = False, ) -> "CollectResult": """ This method will automatically paginate and concatenate results for you. @@ -143,7 +145,7 @@ def get_all( show_bar (bool, optional): _description_. Defaults to True. to_df (bool, optional): _description_. Defaults to False. to_list (bool, optional): _description_. Defaults to False. - show_term_count (bool, optional): _description_. Defaults to False. + show_counts (bool, optional): _description_. Defaults to False. Returns: CollectResult: _description_ @@ -162,7 +164,6 @@ def get_all( limit=self._limit, api_instance=self._api_instance, show_sql=self.show_sql, - show_count=self.show_count, q_object=self.q_object, ) @@ -174,7 +175,7 @@ def get_all( output=output, format_type=self.format_type, show_bar=show_bar, - show_term_count=show_term_count, + show_counts=show_counts, ) # add this to cast to a subclass of CollectResult @@ -223,7 +224,7 @@ async def async_prev_page( return anyio.to_thread.run_sync(self.prev_page, limit, async_req, pre_stream) def next_page( - self, limit: int = 100, show_term_count: bool = False + self, limit: int = 100, show_counts: Optional[bool] = None ) -> Union[ApplyResult[Any], Result, Paged_Result, None]: """ The next_page function will call the server for the next page using this \ @@ -237,7 +238,8 @@ def next_page( Returns: _type_: _description_ """ - + if show_counts is None: + show_counts = self.q_object.get_counts() if isinstance(self._offset, int) and isinstance(self._limit, int): if self._api_response["next_url"] is not None: self._limit = int( @@ -255,7 +257,7 @@ def next_page( _offset=self._offset, _limit=self._limit, include_total_count=True, - show_term_count=show_term_count, + show_counts=show_counts, ) if next_result.total_row_count is not None: diff --git a/cdapython/results/result.py b/cdapython/results/result.py index fa479ce1..c819dbe8 100644 --- a/cdapython/results/result.py +++ b/cdapython/results/result.py @@ -53,7 +53,6 @@ def __init__( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, format_type: str = "json", ) -> None: self._api_response: PagedResponseData = api_response @@ -64,7 +63,6 @@ def __init__( self._df: DataFrame super().__init__( show_sql=show_sql, - show_count=show_count, format_type=format_type, result=self._api_response.result, ) diff --git a/cdapython/results/string_result.py b/cdapython/results/string_result.py index 3d2bd41c..cf4d7ccd 100644 --- a/cdapython/results/string_result.py +++ b/cdapython/results/string_result.py @@ -23,7 +23,6 @@ def __init__( limit: int, api_instance: QueryApi, show_sql: bool, - show_count: bool, format_type: str = "json", ) -> None: super().__init__( @@ -32,7 +31,6 @@ def __init__( limit=limit, api_instance=api_instance, show_sql=show_sql, - show_count=show_count, format_type=format_type, ) diff --git a/cdapython/utils/Qconfig.py b/cdapython/utils/Qconfig.py index 8b14db37..20e0e99e 100644 --- a/cdapython/utils/Qconfig.py +++ b/cdapython/utils/Qconfig.py @@ -14,6 +14,7 @@ def __init__( version: Optional[str] = None, show_sql: bool = False, verbose: Optional[bool] = None, + show_counts: Optional[bool] = None, ) -> None: """ This class is made to keep Q's configuration settings to pass on to other Q class or methods. @@ -30,6 +31,9 @@ def __init__( self.version: str = Constants.table_version if version is None else version self.show_sql: bool = show_sql self.verbose: Union[bool, None] = verbose if verbose is None else verbose + self.show_counts: Union[bool, None] = ( + show_counts if show_counts is None else show_counts + ) def copy_config(self) -> Qconfig: """ diff --git a/cdapython/utils/utility.py b/cdapython/utils/utility.py index f91e9149..107cc95e 100644 --- a/cdapython/utils/utility.py +++ b/cdapython/utils/utility.py @@ -107,11 +107,10 @@ def unique_terms( async_req: Optional[bool] = True, show_sql: bool = False, show_counts: bool = False, - show_term_count: bool = False, verbose: bool = True, limit: int = 100, ) -> Paged_Result: - # TODO : Should the value be changed? The "show_term_count" parameter was copied from Swagger-generated code. Is this description sufficient? + # TODO : Should the value be changed? The "show_counts" parameter was copied from Swagger-generated code. Is this description sufficient? """ Show all unique terms for a given column. Args: @@ -122,8 +121,7 @@ def unique_terms( verify (Optional[bool], optional): This will send a request to the cda server without verifying the SSL Cert Verification. Defaults to None. async_req (Optional[bool], optional): Execute request asynchronously. Defaults to True. show_sql (bool, optional): This will show the sql returned from the server. Defaults to False. - show_counts (bool, optional): Currently, the functionality is nonfunctional and pending further investigation to determine if it is needed: This will show or hide the count. Defaults to False. - show_term_count (bool, optional): Show the number of occurrences for each value. Defaults to False. + show_counts (bool, optional): Show the number of occurrences for each value. Defaults to False. verbose (bool, optional): This will hide or show values that are automatic printed when Q runs. Defaults to True. limit (int, optional): the numbers of entries to return per page of data. Defaults to 100. @@ -142,8 +140,7 @@ def unique_terms( host=host, verify=verify, show_sql=show_sql, - show_count=show_counts, - show_term_count=show_term_count, + show_counts=show_counts, verbose=verbose, async_call=async_req, ) @@ -192,7 +189,6 @@ def columns( if "result" in api_response: query_result: ColumnsResult = ColumnsResult( show_sql=show_sql, - show_count=True, result=api_response["result"], description=description, ) @@ -200,7 +196,6 @@ def columns( else: query_result: ColumnsResult = ColumnsResult( show_sql=show_sql, - show_count=True, result=api_response, description=description, ) diff --git a/tests/Q_test_is.py b/tests/Q_test_is.py index 1078006b..dbe17b60 100644 --- a/tests/Q_test_is.py +++ b/tests/Q_test_is.py @@ -1338,7 +1338,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/async_strem_test.py b/tests/async_strem_test.py index bf015c80..a008e5d8 100644 --- a/tests/async_strem_test.py +++ b/tests/async_strem_test.py @@ -4,7 +4,7 @@ from cdapython import Q from cdapython.results.page_result import Paged_Result -from tests.global_settings import host, project +from tests.global_settings import host async def main() -> None: diff --git a/tests/drs_id.py b/tests/drs_id.py index 0e49450b..5da4bed7 100644 --- a/tests/drs_id.py +++ b/tests/drs_id.py @@ -1,9 +1,5 @@ -from global_settings import ( - integration_host, - integration_table, - localhost, - production_host, -) +from tests.global_settings import host + from cdapython import Q from cdapython.utils.utility import get_host_url @@ -12,9 +8,9 @@ def main(): # print(Q("sex = 'male'").to_json()) all_data = ( - Q("sex = REPLACE(REPLACE(sex,'fe',''), 'male', '' ) AND id = 1", lark=True) + Q("sex = REPLACE(REPLACE(sex,'fe',''), 'male', '' ) AND id = 1", debug=True) .SELECT("id,sex") - .set_host(localhost) + .set_host(host) .run() ) print(all_data) diff --git a/tests/fake_result.py b/tests/fake_result.py index 306ad6b1..7aca853f 100644 --- a/tests/fake_result.py +++ b/tests/fake_result.py @@ -34,10 +34,6 @@ def api_instance(self) -> QueryApi: def show_sql(self) -> Literal[False]: return False - @property - def show_count(self) -> bool: - return False - @property def format_type(self) -> str: return "json" diff --git a/tests/filter_test_error.py b/tests/filter_test_error.py index 85588c04..cc9dd152 100644 --- a/tests/filter_test_error.py +++ b/tests/filter_test_error.py @@ -10,7 +10,7 @@ # print(df) -columns().to_dataframe(include="Column_Name:specimen_Files") +columns().to_dataframe() Q("GDC_FILTER = 'NonExonic;bitgt'").mutation.ORDER_BY("sex:-1").run( diff --git a/tests/jay_error.py b/tests/jay_error.py index 6e817254..08582875 100644 --- a/tests/jay_error.py +++ b/tests/jay_error.py @@ -16,7 +16,7 @@ b = Q( "identifier_system = 'IDC' FROM researchSubject_identifier_system = 'PDC' FROM researchSubject_identifier_system = 'GDC'", - lark=True, + debug=True, ).to_json() print("lark") with open("lark.json", "w") as f: diff --git a/tests/long_query_in.py b/tests/long_query_in.py index 59b64ba9..914e23c6 100644 --- a/tests/long_query_in.py +++ b/tests/long_query_in.py @@ -44,7 +44,7 @@ ) s = Q("sex = 'male'").run() df = pandas.DataFrame() -p = s.get_all(to_df=True, limit=10000, host_df=df) +p = s.get_all(to_df=True, limit=10000) print(p) diff --git a/tests/supermock.py b/tests/supermock.py index 5667d2d2..ecb67ca9 100644 --- a/tests/supermock.py +++ b/tests/supermock.py @@ -34,7 +34,6 @@ def _patch(*args, **kwargs): limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) if self.result_type.CountResult == "CountResult": @@ -44,7 +43,6 @@ def _patch(*args, **kwargs): limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) return func(*args, **kwargs) diff --git a/tests/test_C_integration.py b/tests/test_C_integration.py index 188bf820..f5590945 100644 --- a/tests/test_C_integration.py +++ b/tests/test_C_integration.py @@ -10,7 +10,7 @@ def test_basic_integration() -> None: def test_unique_terms() -> None: - terms = unique_terms(col_name="sex", system="GDC", host=host, show_term_count=True) + terms = unique_terms(col_name="sex", system="GDC", host=host, show_counts=True) list_terms = terms.to_list() flat_terms = [list(i.values())[0] for i in list_terms] assert "female" in flat_terms diff --git a/tests/test_D_api_call.py b/tests/test_D_api_call.py index f3c769e7..298a0312 100644 --- a/tests/test_D_api_call.py +++ b/tests/test_D_api_call.py @@ -35,7 +35,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_E_ssl.py b/tests/test_E_ssl.py index fe51e7a8..df3c67b7 100644 --- a/tests/test_E_ssl.py +++ b/tests/test_E_ssl.py @@ -34,7 +34,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_Q_count.py b/tests/test_Q_count.py index 15ba43b7..16d59b90 100644 --- a/tests/test_Q_count.py +++ b/tests/test_Q_count.py @@ -23,7 +23,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_Q_not_like.py b/tests/test_Q_not_like.py index f7bb6503..639a3847 100644 --- a/tests/test_Q_not_like.py +++ b/tests/test_Q_not_like.py @@ -69,7 +69,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_age_at_collection.py b/tests/test_age_at_collection.py index a1709332..9e599d62 100644 --- a/tests/test_age_at_collection.py +++ b/tests/test_age_at_collection.py @@ -1520,7 +1520,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_bigDownTheHouse.py b/tests/test_bigDownTheHouse.py index 4a36696f..23d27111 100644 --- a/tests/test_bigDownTheHouse.py +++ b/tests/test_bigDownTheHouse.py @@ -2119,7 +2119,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_checking_IN.py b/tests/test_checking_IN.py index 61822892..74a9c1ee 100644 --- a/tests/test_checking_IN.py +++ b/tests/test_checking_IN.py @@ -66,7 +66,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_checking_IN_query_op.py b/tests/test_checking_IN_query_op.py index 2c91077f..909beb12 100644 --- a/tests/test_checking_IN_query_op.py +++ b/tests/test_checking_IN_query_op.py @@ -34,7 +34,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_donvan_error.py b/tests/test_donvan_error.py index fe6c80c8..5d51338b 100644 --- a/tests/test_donvan_error.py +++ b/tests/test_donvan_error.py @@ -42,7 +42,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_get_all_reset_CD-636.py b/tests/test_get_all_reset_CD-636.py index ea9c8024..0faa299e 100644 --- a/tests/test_get_all_reset_CD-636.py +++ b/tests/test_get_all_reset_CD-636.py @@ -27,15 +27,15 @@ def test_data_type_get_all(): assert all_data_type_copy == all_data_type -def test_data_type_show_term_count(): - data_type = unique_terms("data_type", host=host).get_all(show_term_count=True) +def test_data_type_show_counts(): + data_type = unique_terms("data_type", host=host).get_all(show_counts=True) assert "count" in data_type.to_dataframe() @pytest.mark.skip(reason="CD-650 Backend always returns counts") -def test_data_type_show_term_count_false(): +def test_data_type_show_counts_false(): with pytest.raises(KeyError): - data_type = unique_terms("data_type", host=host).get_all(show_term_count=False) + data_type = unique_terms("data_type", host=host).get_all(show_counts=False) df = data_type.to_dataframe()["count"] assert df is None diff --git a/tests/test_global_counts.py b/tests/test_global_counts.py index 34dfc0cb..01ceb60f 100644 --- a/tests/test_global_counts.py +++ b/tests/test_global_counts.py @@ -25,7 +25,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_in_pare.py b/tests/test_in_pare.py index 4831b246..c9f2b42a 100644 --- a/tests/test_in_pare.py +++ b/tests/test_in_pare.py @@ -1342,7 +1342,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_kidney.py b/tests/test_kidney.py index 6d577e1e..ff5d68ff 100644 --- a/tests/test_kidney.py +++ b/tests/test_kidney.py @@ -6977,7 +6977,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_ssl_cert_checks.py b/tests/test_ssl_cert_checks.py index 33fc6321..344be880 100644 --- a/tests/test_ssl_cert_checks.py +++ b/tests/test_ssl_cert_checks.py @@ -35,7 +35,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/test_unique_terms_page_bug.py b/tests/test_unique_terms_page_bug.py index 3eb15d20..fe60eeb1 100644 --- a/tests/test_unique_terms_page_bug.py +++ b/tests/test_unique_terms_page_bug.py @@ -1,12 +1,61 @@ +""" +Testing unique_term pagination using paginator and get_all +""" from cdapython import unique_terms -from tests.global_settings import host, project +from tests.global_settings import host + + +# TODO debug paginator not returning show_counts with unique_terms def test_unique_paginator(): projlist = [] - for i in unique_terms( - col_name="sex", - host=host, - ).paginator(to_list=True): + terms = unique_terms(col_name="sex", host=host, show_counts=True) + for list_of_terms in terms.paginator(to_list=True): + assert all("count" in term_dict for term_dict in list_of_terms) is True + projlist.extend(list_of_terms) + + assert len(projlist) > 0 + assert any("count" in d for d in projlist) is True + + +def test_unique_paginator_show_count_false(): + projlist = [] + terms = unique_terms(col_name="sex", host=host, show_counts=False) + for i in terms.paginator(to_list=True): + projlist.extend(i) + + assert len(projlist) > 0 + assert any("count" in d for d in projlist) is False + + +def test_unique_paginator_show_count_unset(): + projlist = [] + terms = unique_terms(col_name="sex", host=host) + for i in terms.paginator(to_list=True): projlist.extend(i) + assert len(projlist) > 0 + assert any("count" in d for d in projlist) is False + + +def test_unique_list(): + projlist = ( + unique_terms("primary_diagnosis_site").get_all(show_counts=True).to_dataframe() + ) + print(projlist) + assert any("count" in d for d in projlist) is True + + +def test_unique_list_turn_off_show_counts(): + projlist = ( + unique_terms("primary_diagnosis_site").get_all(show_counts=False).to_dataframe() + ) + print(projlist) + assert all("count" in d for d in projlist) is False + + +def test_unique_list_unset_show_counts(): + projlist = unique_terms("primary_diagnosis_site").get_all().to_dataframe() + print(projlist) + assert all("count" in d for d in projlist) is False diff --git a/tests/test_unqine_terms_get_all.py b/tests/test_unqine_terms_get_all.py index 2e60e89b..84c68327 100644 --- a/tests/test_unqine_terms_get_all.py +++ b/tests/test_unqine_terms_get_all.py @@ -27,7 +27,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/testing_researchsubject_count.py b/tests/testing_researchsubject_count.py index 57bd11ab..2062e934 100644 --- a/tests/testing_researchsubject_count.py +++ b/tests/testing_researchsubject_count.py @@ -24,7 +24,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) diff --git a/tests/testing_subject_count_copy.py b/tests/testing_subject_count_copy.py index 5b945fe8..b65220b3 100644 --- a/tests/testing_subject_count_copy.py +++ b/tests/testing_subject_count_copy.py @@ -24,7 +24,6 @@ limit=fake.limit, api_instance=fake.api_instance, show_sql=fake.show_sql, - show_count=fake.show_count, format_type=fake.format_type, ) From f7bcdf376c48e2c549eb80a376905d225e9d7cd5 Mon Sep 17 00:00:00 2001 From: Dion Boles Date: Mon, 27 Nov 2023 16:39:24 -0500 Subject: [PATCH 03/13] removed TODO from test and updated host in test --- ..._demonstrate_using_pandas_outside_of_cda_python.py | 11 +++++++---- tests/test_unique_terms_page_bug.py | 3 --- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_demonstrate_using_pandas_outside_of_cda_python.py b/tests/test_demonstrate_using_pandas_outside_of_cda_python.py index 649aeaa4..78f04bf9 100644 --- a/tests/test_demonstrate_using_pandas_outside_of_cda_python.py +++ b/tests/test_demonstrate_using_pandas_outside_of_cda_python.py @@ -1,19 +1,22 @@ from pandas import json_normalize from pandas.testing import assert_frame_equal from cdapython import unique_terms, Q +from tests.global_settings import host def test_demonstrate_list_to_df_with_search_replacement(): # new way df = json_normalize( - data=unique_terms(col_name="primary_diagnosis_site", show_counts=True).to_list() + data=unique_terms( + col_name="primary_diagnosis_site", show_counts=True, host=host + ).to_list() ) new_way = df.loc[ df["primary_diagnosis_site"].str.contains("gland", case=False, na=False) ].reset_index(drop=True) # old way - old_way = unique_terms("primary_diagnosis_site", show_counts=True).to_dataframe( - search_fields="primary_diagnosis_site", search_value="gland" - ) + old_way = unique_terms( + "primary_diagnosis_site", show_counts=True, host=host + ).to_dataframe(search_fields="primary_diagnosis_site", search_value="gland") assert_frame_equal(new_way, old_way) diff --git a/tests/test_unique_terms_page_bug.py b/tests/test_unique_terms_page_bug.py index fe60eeb1..fc09be16 100644 --- a/tests/test_unique_terms_page_bug.py +++ b/tests/test_unique_terms_page_bug.py @@ -5,9 +5,6 @@ from tests.global_settings import host -# TODO debug paginator not returning show_counts with unique_terms - - def test_unique_paginator(): projlist = [] terms = unique_terms(col_name="sex", host=host, show_counts=True) From bb1df5a2bfec6b96c8c3b1ecd237bd7e82059166 Mon Sep 17 00:00:00 2001 From: Dion Boles Date: Tue, 28 Nov 2023 11:06:09 -0500 Subject: [PATCH 04/13] updated docstring and removed TODO from code --- cdapython/Q.py | 31 +++++++++---------- .../factories/booleanquery/boolean_query.py | 5 ++- cdapython/results/page_result.py | 2 +- cdapython/utils/utility.py | 1 - 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/cdapython/Q.py b/cdapython/Q.py index bdbc7f95..5dceb34e 100644 --- a/cdapython/Q.py +++ b/cdapython/Q.py @@ -307,10 +307,9 @@ def get_counts(self) -> bool: def set_counts(self, show_counts: bool) -> Q: """ - this will set the private propey _verbose + This will set the private property _verbose Args: value (bool) - Returns: Q """ @@ -321,7 +320,7 @@ def set_counts(self, show_counts: bool) -> Q: def set_verbose(self, value: bool) -> Q: """ - this will set the private propey _verbose + This will set the private property _verbose Args: value (bool) @@ -511,7 +510,7 @@ def bigquery_status( @property def file(self) -> Q: """_summary_ - this is a chaining method used to get files + This is a chaining method used to get files Returns: _type_ """ @@ -520,7 +519,7 @@ def file(self) -> Q: @property def count(self) -> Q: """_summary_ - this is a chaining method used to get counts + This is a chaining method used to get counts Returns: _type_ """ @@ -597,7 +596,7 @@ def _call_endpoint( limit (int): _description_ async_req (bool): _description_ include_total_count (bool): _description_ - show_counts (Optional[bool]): _description_ + show_counts (Optional[bool]): Show the number of occurrences for each value Returns: PagedResponseData: _description_ @@ -653,22 +652,22 @@ def run( """ This will call the server to make a request return a Result like object Args: - offset (int, optional). Defaults to None. - limit (int, optional). Defaults to None. - version (Union[str, None], optional). Defaults to None. - host (Union[str, None], optional). Defaults to None. + offset (int, optional) The number of entries to skip. Defaults to None. + limit (int, optional) The numbers of entries to return per page of data. Defaults to None. + host (Union[str, None], optional) This is where the user can set a host for a different server. Defaults to None. dry_run (bool, optional). Defaults to False. - table (Union[str, None], optional). Defaults to None. - async_call (bool, optional). Defaults to False. - verify (Union[bool, None], optional). Defaults to None. - verbose (bool, optional). Defaults to True. + async_call (bool, optional) Execute request asynchronously. Defaults to False. + verify (Union[bool, None] This will send a request to the cda server without verifying the SSL Cert Verification, optional). Defaults to None. + verbose (bool, optional) This will hide or show values that are automatic printed when Q runs. Defaults to True. include (Union[str, None], optional). Defaults to None. format_type (str, optional). Defaults to "json". - show_sql (bool, optional). Defaults to False. - show_counts (bool, optional). Defaults to False. + show_sql (bool, optional) This will show the sql returned from the server. Defaults to False. + show_counts (bool, optional) Show the number of occurrences for each value. Defaults to False. + include_total_count bool This will return add a param to the request to the server Returns: Union[QueryCreatedData, ApplyResult, Result, DryClass, None]: _description_ """ + dry_run_current = False if host is None: diff --git a/cdapython/factories/booleanquery/boolean_query.py b/cdapython/factories/booleanquery/boolean_query.py index ea9d0d01..4c31fe8a 100644 --- a/cdapython/factories/booleanquery/boolean_query.py +++ b/cdapython/factories/booleanquery/boolean_query.py @@ -19,10 +19,9 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, - show_counts: Optional[bool], ) -> PagedResponseData: """ - Call the endpoint to start the job for data collection. + Args: api_instance (QueryApi): _description_ dry_run (bool): _description_ @@ -30,7 +29,7 @@ def _call_endpoint( limit (int): _description_ async_req (bool): _description_ include_total_count (bool): _description_ - show_term_frequency (Optional[bool]): _description_ + show_counts (Optional[bool]): Show the number of occurrences for each value Returns: PagedResponseData: _description_ diff --git a/cdapython/results/page_result.py b/cdapython/results/page_result.py index 18195414..146f8c2d 100644 --- a/cdapython/results/page_result.py +++ b/cdapython/results/page_result.py @@ -145,7 +145,7 @@ def get_all( show_bar (bool, optional): _description_. Defaults to True. to_df (bool, optional): _description_. Defaults to False. to_list (bool, optional): _description_. Defaults to False. - show_counts (bool, optional): _description_. Defaults to False. + show_counts (bool, optional): Show the number of occurrences for each value. Defaults to False. Returns: CollectResult: _description_ diff --git a/cdapython/utils/utility.py b/cdapython/utils/utility.py index 107cc95e..51e75844 100644 --- a/cdapython/utils/utility.py +++ b/cdapython/utils/utility.py @@ -110,7 +110,6 @@ def unique_terms( verbose: bool = True, limit: int = 100, ) -> Paged_Result: - # TODO : Should the value be changed? The "show_counts" parameter was copied from Swagger-generated code. Is this description sufficient? """ Show all unique terms for a given column. Args: From 930e1291a897b611e3a78e58fd18f8fdf034c9f4 Mon Sep 17 00:00:00 2001 From: Dion Boles Date: Tue, 28 Nov 2023 11:25:23 -0500 Subject: [PATCH 05/13] factory boolanquery was missing a show_counts --- cdapython/factories/booleanquery/boolean_query.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cdapython/factories/booleanquery/boolean_query.py b/cdapython/factories/booleanquery/boolean_query.py index 4c31fe8a..ee5da385 100644 --- a/cdapython/factories/booleanquery/boolean_query.py +++ b/cdapython/factories/booleanquery/boolean_query.py @@ -19,6 +19,7 @@ def _call_endpoint( limit: int, async_req: bool, include_total_count: bool, + show_counts: Optional[bool], ) -> PagedResponseData: """ From 8283f1c4518c8d884dcb3e7f0d5762f885344ace Mon Sep 17 00:00:00 2001 From: Dion Boles Date: Tue, 28 Nov 2023 13:42:36 -0500 Subject: [PATCH 06/13] updated docstring to run test --- cdapython/factories/booleanquery/boolean_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdapython/factories/booleanquery/boolean_query.py b/cdapython/factories/booleanquery/boolean_query.py index ee5da385..6a61e8b9 100644 --- a/cdapython/factories/booleanquery/boolean_query.py +++ b/cdapython/factories/booleanquery/boolean_query.py @@ -26,7 +26,7 @@ def _call_endpoint( Args: api_instance (QueryApi): _description_ dry_run (bool): _description_ - offset (int): _description_ + offset (int) The number of entries to skip.: limit (int): _description_ async_req (bool): _description_ include_total_count (bool): _description_ From 4befe6105d7f0f6f03e19614f36ad703d986a95e Mon Sep 17 00:00:00 2001 From: abradyGDIT Date: Tue, 5 Dec 2023 16:04:09 -0500 Subject: [PATCH 07/13] Tweaking requirements to use compatible cda_client --- requirements.txt | 2 +- requirements.txt.bak | 99 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 requirements.txt.bak diff --git a/requirements.txt b/requirements.txt index 3943b3c4..2befd911 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ attrs==23.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" backcall==0.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" beautifulsoup4==4.12.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" bleach==6.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -cda-client @ git+https://github.com/CancerDataAggregator/cda-service-python-client.git@3.3.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +cda-client @ git+https://github.com/CancerDataAggregator/cda-service-python-client.git@sql_update ; python_version >= "3.8" and python_full_version <= "3.11.0" certifi==2022.12.7 ; python_version >= "3.8" and python_full_version <= "3.11.0" cffi==1.16.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" charset-normalizer==3.3.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" diff --git a/requirements.txt.bak b/requirements.txt.bak new file mode 100644 index 00000000..3943b3c4 --- /dev/null +++ b/requirements.txt.bak @@ -0,0 +1,99 @@ +anyio==3.7.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +appnope==0.1.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" and sys_platform == "darwin" or python_version >= "3.8" and python_full_version <= "3.11.0" and platform_system == "Darwin" +argon2-cffi-bindings==21.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +argon2-cffi==23.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +asttokens==2.4.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +attrs==23.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +backcall==0.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +beautifulsoup4==4.12.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +bleach==6.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +cda-client @ git+https://github.com/CancerDataAggregator/cda-service-python-client.git@3.3.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +certifi==2022.12.7 ; python_version >= "3.8" and python_full_version <= "3.11.0" +cffi==1.16.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +charset-normalizer==3.3.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +colorama==0.4.6 ; python_version >= "3.8" and python_full_version <= "3.11.0" and sys_platform == "win32" +comm==0.1.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" +contourpy==1.1.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +cycler==0.12.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +debugpy==1.8.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +decorator==5.1.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +defusedxml==0.7.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +entrypoints==0.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" +exceptiongroup==1.1.3 ; python_version >= "3.8" and python_version < "3.11" +executing==2.0.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +fastjsonschema==2.16.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +fonttools==4.43.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +idna==3.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" +importlib-metadata==6.8.0 ; python_version >= "3.8" and python_version < "3.10" +importlib-resources==6.1.0 ; python_version >= "3.8" and python_version < "3.9" +ipykernel==6.25.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +ipython-genutils==0.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +ipython==8.12.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" +ipywidgets==7.6.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" +itables==1.6.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +jedi==0.19.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +jinja2==3.1.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +jsonschema-specifications==2023.7.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +jsonschema==4.19.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +jupyter-client==8.3.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +jupyter-core==4.12.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +jupyterlab-pygments==0.2.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +jupyterlab-widgets==3.0.9 ; python_version >= "3.8" and python_full_version <= "3.11.0" +kiwisolver==1.4.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" +lark==1.1.7 ; python_version >= "3.8" and python_full_version <= "3.11.0" +lxml==4.9.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" +markdown-it-py==2.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +markupsafe==2.1.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" +matplotlib-inline==0.1.6 ; python_version >= "3.8" and python_full_version <= "3.11.0" +matplotlib==3.6.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +mdurl==0.1.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +mistune==0.8.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" +nbclient==0.8.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +nbconvert==6.5.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +nbformat==5.9.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +nest-asyncio==1.5.8 ; python_version >= "3.8" and python_full_version <= "3.11.0" +notebook==6.4.12 ; python_version >= "3.8" and python_full_version <= "3.11.0" +numpy==1.24.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" +packaging==21.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pandas==1.4.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pandocfilters==1.5.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +parso==0.8.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pexpect==4.8.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" and sys_platform != "win32" +pickleshare==0.7.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pillow==9.3.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pkgutil-resolve-name==1.3.10 ; python_version >= "3.8" and python_version < "3.9" +prometheus-client==0.17.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +prompt-toolkit==3.0.39 ; python_version >= "3.8" and python_full_version <= "3.11.0" +psutil==5.9.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" +ptyprocess==0.7.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" and os_name != "nt" or python_version >= "3.8" and python_full_version <= "3.11.0" and sys_platform != "win32" +pure-eval==0.2.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pycparser==2.21 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pygments==2.16.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pyparsing==3.1.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +python-dotenv==0.18.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pytz==2023.3.post1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.8" and python_full_version <= "3.11.0" +pywinpty==2.0.12 ; python_version >= "3.8" and python_full_version <= "3.11.0" and os_name == "nt" +pyzmq==25.0.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +referencing==0.30.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +regex==2022.10.31 ; python_version >= "3.8" and python_full_version <= "3.11.0" +requests==2.31.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +rich==13.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +rpds-py==0.10.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" +send2trash==1.8.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +six==1.16.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +sniffio==1.3.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +soupsieve==2.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" +stack-data==0.6.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" +terminado==0.17.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +tinycss2==1.2.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +tornado==6.3.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" +traitlets==5.11.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +typing-extensions==4.4.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" +urllib3==1.26.8 ; python_version >= "3.8" and python_full_version <= "3.11.0" +wcwidth==0.2.8 ; python_version >= "3.8" and python_full_version <= "3.11.0" +webencodings==0.5.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" +wheel==0.38.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" +widgetsnbextension==3.5.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" +zipp==3.17.0 ; python_version >= "3.8" and python_version < "3.10" From 2789efcea5a09586b8e4acc3dbecacc32aed9f3b Mon Sep 17 00:00:00 2001 From: abradyGDIT Date: Tue, 5 Dec 2023 16:23:03 -0500 Subject: [PATCH 08/13] Updating pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ed58b984..96680a8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ notebook = "6.4.12" fastjsonschema = "2.16.2" jupyter-core = "4.12" pyzmq = "25" -cda-client = {git = "https://github.com/CancerDataAggregator/cda-service-python-client.git", rev = "3.3.2"} +cda-client = {git = "https://github.com/CancerDataAggregator/cda-service-python-client.git", rev = "sql_update"} lark = "^1.1.5" regex = "^2022.10.31" tornado = "^6.3.2" From 69e3e8a84dac56c785e0906b6ac1f52c30010bf6 Mon Sep 17 00:00:00 2001 From: abradyGDIT Date: Wed, 6 Dec 2023 10:31:17 -0500 Subject: [PATCH 09/13] Removing stale file --- requirements.txt.bak | 99 -------------------------------------------- 1 file changed, 99 deletions(-) delete mode 100644 requirements.txt.bak diff --git a/requirements.txt.bak b/requirements.txt.bak deleted file mode 100644 index 3943b3c4..00000000 --- a/requirements.txt.bak +++ /dev/null @@ -1,99 +0,0 @@ -anyio==3.7.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -appnope==0.1.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" and sys_platform == "darwin" or python_version >= "3.8" and python_full_version <= "3.11.0" and platform_system == "Darwin" -argon2-cffi-bindings==21.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -argon2-cffi==23.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -asttokens==2.4.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -attrs==23.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -backcall==0.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -beautifulsoup4==4.12.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -bleach==6.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -cda-client @ git+https://github.com/CancerDataAggregator/cda-service-python-client.git@3.3.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -certifi==2022.12.7 ; python_version >= "3.8" and python_full_version <= "3.11.0" -cffi==1.16.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -charset-normalizer==3.3.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -colorama==0.4.6 ; python_version >= "3.8" and python_full_version <= "3.11.0" and sys_platform == "win32" -comm==0.1.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" -contourpy==1.1.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -cycler==0.12.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -debugpy==1.8.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -decorator==5.1.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -defusedxml==0.7.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -entrypoints==0.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" -exceptiongroup==1.1.3 ; python_version >= "3.8" and python_version < "3.11" -executing==2.0.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -fastjsonschema==2.16.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -fonttools==4.43.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -idna==3.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" -importlib-metadata==6.8.0 ; python_version >= "3.8" and python_version < "3.10" -importlib-resources==6.1.0 ; python_version >= "3.8" and python_version < "3.9" -ipykernel==6.25.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -ipython-genutils==0.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -ipython==8.12.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" -ipywidgets==7.6.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" -itables==1.6.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -jedi==0.19.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -jinja2==3.1.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -jsonschema-specifications==2023.7.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -jsonschema==4.19.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -jupyter-client==8.3.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -jupyter-core==4.12.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -jupyterlab-pygments==0.2.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -jupyterlab-widgets==3.0.9 ; python_version >= "3.8" and python_full_version <= "3.11.0" -kiwisolver==1.4.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" -lark==1.1.7 ; python_version >= "3.8" and python_full_version <= "3.11.0" -lxml==4.9.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" -markdown-it-py==2.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -markupsafe==2.1.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" -matplotlib-inline==0.1.6 ; python_version >= "3.8" and python_full_version <= "3.11.0" -matplotlib==3.6.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -mdurl==0.1.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -mistune==0.8.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" -nbclient==0.8.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -nbconvert==6.5.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -nbformat==5.9.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -nest-asyncio==1.5.8 ; python_version >= "3.8" and python_full_version <= "3.11.0" -notebook==6.4.12 ; python_version >= "3.8" and python_full_version <= "3.11.0" -numpy==1.24.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" -packaging==21.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pandas==1.4.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pandocfilters==1.5.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -parso==0.8.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pexpect==4.8.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" and sys_platform != "win32" -pickleshare==0.7.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pillow==9.3.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pkgutil-resolve-name==1.3.10 ; python_version >= "3.8" and python_version < "3.9" -prometheus-client==0.17.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -prompt-toolkit==3.0.39 ; python_version >= "3.8" and python_full_version <= "3.11.0" -psutil==5.9.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" -ptyprocess==0.7.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" and os_name != "nt" or python_version >= "3.8" and python_full_version <= "3.11.0" and sys_platform != "win32" -pure-eval==0.2.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pycparser==2.21 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pygments==2.16.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pyparsing==3.1.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -python-dateutil==2.8.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -python-dotenv==0.18.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pytz==2023.3.post1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.8" and python_full_version <= "3.11.0" -pywinpty==2.0.12 ; python_version >= "3.8" and python_full_version <= "3.11.0" and os_name == "nt" -pyzmq==25.0.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -referencing==0.30.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -regex==2022.10.31 ; python_version >= "3.8" and python_full_version <= "3.11.0" -requests==2.31.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -rich==13.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -rpds-py==0.10.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" -send2trash==1.8.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -six==1.16.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -sniffio==1.3.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -soupsieve==2.5 ; python_version >= "3.8" and python_full_version <= "3.11.0" -stack-data==0.6.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" -terminado==0.17.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -tinycss2==1.2.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -tornado==6.3.3 ; python_version >= "3.8" and python_full_version <= "3.11.0" -traitlets==5.11.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -typing-extensions==4.4.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -urllib3==1.26.8 ; python_version >= "3.8" and python_full_version <= "3.11.0" -wcwidth==0.2.8 ; python_version >= "3.8" and python_full_version <= "3.11.0" -webencodings==0.5.1 ; python_version >= "3.8" and python_full_version <= "3.11.0" -wheel==0.38.4 ; python_version >= "3.8" and python_full_version <= "3.11.0" -widgetsnbextension==3.5.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" -zipp==3.17.0 ; python_version >= "3.8" and python_version < "3.10" From 73a3f9a399da8f7bfff3c731bbe2e1d1ac790a45 Mon Sep 17 00:00:00 2001 From: abradyGDIT Date: Wed, 6 Dec 2023 10:44:27 -0500 Subject: [PATCH 10/13] Updating requirements directives for compatible cda-service-python-client installation --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 96680a8f..f67f9831 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ notebook = "6.4.12" fastjsonschema = "2.16.2" jupyter-core = "4.12" pyzmq = "25" -cda-client = {git = "https://github.com/CancerDataAggregator/cda-service-python-client.git", rev = "sql_update"} +cda-client = {git = "https://github.com/CancerDataAggregator/cda-service-python-client.git", rev = "develop"} lark = "^1.1.5" regex = "^2022.10.31" tornado = "^6.3.2" diff --git a/requirements.txt b/requirements.txt index 2befd911..f1beba32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ attrs==23.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" backcall==0.2.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" beautifulsoup4==4.12.2 ; python_version >= "3.8" and python_full_version <= "3.11.0" bleach==6.1.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" -cda-client @ git+https://github.com/CancerDataAggregator/cda-service-python-client.git@sql_update ; python_version >= "3.8" and python_full_version <= "3.11.0" +cda-client @ git+https://github.com/CancerDataAggregator/cda-service-python-client.git@develop ; python_version >= "3.8" and python_full_version <= "3.11.0" certifi==2022.12.7 ; python_version >= "3.8" and python_full_version <= "3.11.0" cffi==1.16.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" charset-normalizer==3.3.0 ; python_version >= "3.8" and python_full_version <= "3.11.0" From beadc5f48514e40691e0c0942d34a086ab5bc12d Mon Sep 17 00:00:00 2001 From: abradyGDIT Date: Wed, 6 Dec 2023 11:01:54 -0500 Subject: [PATCH 11/13] Adding missing __init__.py files --- cdapython/factories/booleanquery/__init__.py | 0 cdapython/factories/unique_terms/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 cdapython/factories/booleanquery/__init__.py create mode 100644 cdapython/factories/unique_terms/__init__.py diff --git a/cdapython/factories/booleanquery/__init__.py b/cdapython/factories/booleanquery/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cdapython/factories/unique_terms/__init__.py b/cdapython/factories/unique_terms/__init__.py new file mode 100644 index 00000000..e69de29b From bb442c697c45389f836a3dcd1892b2b634ab5d08 Mon Sep 17 00:00:00 2001 From: Charbonneau Date: Thu, 7 Dec 2023 11:40:53 -0500 Subject: [PATCH 12/13] removing stale files and fixing gitignore --- .DS_Store | Bin 14340 -> 0 bytes .gitignore | 84 +- DataSummaries.ipynb | 1754 ------------------------------- Untitled-2.ipynb | 155 --- Untitled.ipynb | 389 ------- Untitled1.ipynb | 457 -------- mutation_tests (1).ipynb | 410 -------- notebooks/.DS_Store | Bin 6148 -> 0 bytes notebooks/BuildingACohort.ipynb | 1679 ----------------------------- notebooks/Untitled.ipynb | 1371 ------------------------ notebooks/Untitled1.ipynb | 411 -------- notebooks/example-tester.ipynb | 1120 -------------------- notebooks/example.ipynb | 825 --------------- notebooks/testa.ipynb | 131 --- notebooks/testing_sql.ipynb | 125 --- 15 files changed, 46 insertions(+), 8865 deletions(-) delete mode 100644 .DS_Store delete mode 100644 DataSummaries.ipynb delete mode 100644 Untitled-2.ipynb delete mode 100644 Untitled.ipynb delete mode 100644 Untitled1.ipynb delete mode 100644 mutation_tests (1).ipynb delete mode 100644 notebooks/.DS_Store delete mode 100644 notebooks/BuildingACohort.ipynb delete mode 100644 notebooks/Untitled.ipynb delete mode 100644 notebooks/Untitled1.ipynb delete mode 100644 notebooks/example-tester.ipynb delete mode 100644 notebooks/example.ipynb delete mode 100644 notebooks/testa.ipynb delete mode 100644 notebooks/testing_sql.ipynb diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index c3b0821acaed85f1b20fb97bba3f46ab7df9127b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14340 zcmeHN3v?9a6~1=}Ff&VF62fM8$zw@?NQg<6gpdG%B@ZCs5hMxWp~g*ig{_<2xU)&b zh*Ep1t*=&Ft<|cvt+raVzOWu2E!O%xJ$gzbg0GGSxwY){%IPz}Iq?8q=Qc{OQSKHcA4+5RP%6bxG$;m&;Z*Nt-Kq2%OQp4(JtS`j}pU^a>=e0K@(i+BSQ_5fv{iaV~qw`krf&0v8y){8t4mCOIEKh)H^_(11mzI z_TZTTeRC{uMvR?p)g#eZBk#C5NMl)Dx0cPuxAn(@k+8lg5Y?$vl-s7v$S;~ZzjEQ? zXNK?VUqGe{4tBU{DYCgaRF5Utd7U z!XahEp6keTx}14tljt})P}Yj&%>zBb(+7gFo%|jJPPcNRpZ@&1h@aiXFKc@g-csyz zJGZfx#%O?!ep%+#CC+(DhfWibHJ|8Mrj#okr_rNTG+oqqlqzRu$k!7HDVkplt5Is5 zU4zDKwKbx0iBj*}+#BiZqbce%gECR=bv8IpW_Moe(_?M@fiU}5kIQ0qZs#gl-ojc} z`27L@NM^3X-qPxHZq;}AqJg#{QaNoO`>#~Lp6dCdqZW3pW`8gii5iU?d7~B#4+cq} z=^+TUmSwrvTPjqFuWYfGWz=ZP5k>sSaLx&3unbnhRRPR+z$`I z6Yvx~4g26%@I1T%e}p&TEjR@4Afk%7=s*|F#MxMh9$bJ+(2Fau5u0!wZp3!%#7)?Z z0X!9VU=X8tCJy5+JR8r)ui~Znb-Wy}#ar<TN9%BGpmHCsm=pfTSr%TDpH?3>bu7sU+k_`mT zkeMQDla1c9ie$0@ZK5I0Eg^A@Hc5!C`DHTMnT#Ze1rN^}+puE?^&2T&13qL32-4DNp7vZm@us{l{p`9!3Ow7l* zSc1o4DVE_vtieTCN9x;z&A1X*V=F0d2dQrtZowYxML(%<1pD!H)Nz0m_#E7gU%)To zCHNIm<;x6J{x04k)G)4`t)^Ov5+D31<+MQEM4p7!>8K|F+r6gRCMTjlC17(54_A8@ zl)0-GE#%;~fWWP~s>T9t7Z|fNP16c)7gBq|j1r)1u{opX;!vjAY?-c3hGtt2+ZdS# z?Tto4p$!o|$Qbhl(tJZE9A;8PF<=E_|d0D|L0SZa= zy$ZL{k!&q%8kwal4r>H)vba`(%dKd838*E~h@$=oN1nlf8C zLpAOb#88i|O4j^}@F=3H##Sq9YZTMHh{`3ldYK%768Ax(+UsgySY~xCY!*!gF4riq z8Iy3exYo!7XbJEpF4W>`ljUS^GaK7kV=}lEdxPKRdKSmHVxe{b9}RZ-^lqsETH$2q zhn;XCTmjd?-LMDlfkz1D_7lXt3a`T(1aWV}`prtKb&!RP3AuNSs9WBEVzr zc>*CfHqn)saWN45RMh+lisTLuApVKe{9iZ$Wr9ODL17s|;4+HdSnSqJioJoyZ%nzn zNwpaShA_+lFv?>%27sT(3-BVmm@D}=xsqRlH{uWQhj3rB@{<3&7<5GGS6&SRPf8;sF-S)HU4@LTELysvxplpu{n@IT5bKQR zVmV%CVK;d{ZR)g4d#>G?Oz&sr+0`-e4>?-F(cO7=dtwB{STTXe>^XLo$3NT!vzmFO zs>Ed4*~hV8Wd`tE#G2DQ*+=p1XEdSzD*ti*fVilap3XR2{3Z#6Q5 z$}^=ksk~b)Vu8O&<>#m+Ea*3>e5tF9n7nLId9}jjVNAlH@@lDTl*+5M>SD$z3@WeI ztILVbbSoy6Z*VoSAZC0jKfa>g0r$aPcpOGZQD24ENIgHG7=_Vx7NwMsYR<>w4Z6OV zt0$xDEfkImy1p5=8gzX-&p7F%s7y_F8zDI(?3dx!c;@LkydJ-U-{YbAUHE`S9aB+v zi#qm2nwpGUvwZH-p&b0+;T6eB+ZXYFj9&da5+Q-->nwNckM*n_&|^`$Lo>9KX|uFI z0Q#T;HW5~aV1Uj8R9gW}lpT-JUICpXvm80A_4BXcLh(Kk!dNke^5(n;a~Lt$L01b? z{~;=)^zB@br0pd4(iMDgJa;Rjy^@TS{^xq*e_Cdcyf&7IM=|b^@IMmp|JbBGY8F!3 zqq6ESLmY^K6r-bM_(}_ zf!ECb*N*`Fr6Pq_i(k==ujZMCtrV zIE7TSfFgrpdIL3w)U=Y`K~-^8T~6>*vD0NCs)~BcwRyn zdnH~K&t*^`Yf(*WRPab@)l9;*r0>ZSaP3J`d7m|t-?A#JnJWLXd~8C0gDa? z_qCo(6cv^18-V8VzyG%mOUvmMNUy;E&-seR|1anN F{|4+ukeC1f diff --git a/.gitignore b/.gitignore index 03e4dc10..cb73d173 100644 --- a/.gitignore +++ b/.gitignore @@ -1,57 +1,65 @@ -# Intellij IDEA -.idea - -# Jupyter Notebooks -.ipynb_checkpoints +# MacOS .DS_Store -# pip install -e generated -cdapython.egg-info # Python -*.pyc +**/*.py[cod] **/__pycache__ +dist/ +__pycache__/ +*$py.class + +# pip install -e generated +*.egg-info/ + +# Intellij +.idea/ -# pytest -.pytest_cache +# Google credentials files +**/GCS-service-account-key.* -#virtualenv -venv -venv8 -#vscode +# Virtual environment files +/venv*/ .vscode +.env -monkeytype.* +# Jupyter Notebooks +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# IPython +profile_default/ +ipython_config.py -dist #sonrqube scanner .scannerwork /notebooks/example-dev.ipynb typeReport -.DS_Store -.pymon -*.Q -*.tsv -*.csv +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage .coverage.* -.nox +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Data Files +**/*.Q +**/*.tsv +**/*.csv + +# Extras + +.pymon +monkeytype.* .pytype .mypy_cache -.coverage -.venv -venv* -venv2 -.ven3 -.venv_11 -ven3 -.nox -.pytype -venv11 -.coverage -.coverage.* config-dev.ini -.DS_Store .ruff_cache -.DS_Store -venv* -.DS_Store diff --git a/DataSummaries.ipynb b/DataSummaries.ipynb deleted file mode 100644 index f4886c9b..00000000 --- a/DataSummaries.ipynb +++ /dev/null @@ -1,1754 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "8a16a9fe", - "metadata": {}, - "source": [ - "# Summarize Search Results" - ] - }, - { - "cell_type": "markdown", - "id": "4f90b531", - "metadata": {}, - "source": [ - "The CDA provides a custom python tool for searching CDA data. [`Q`](usage/#q) (short for Query) offers several ways to search and filter data, and several input modes:\n", - "\n", - "---\n", - "- **Q.()** builds a query that can be used by `run()` or `count()`\n", - "- **Q.run()** returns data for the specified search \n", - "- **Q.count()** returns summary information (counts) data that fit the specified search\n", - "- **columns()** returns entity field names\n", - "- **unique_terms()** returns entity field contents\n", - "\n", - "---\n", - " \n", - "Before we do any work, we needs to import these functions cdapython.\n", - "We're also telling cdapython to report it's version so we can be sure we're using the one we mean to:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5245b7b0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
2022.8.29\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m2022.8\u001b[0m.\u001b[1;36m29\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import Q, columns, unique_terms, query\n", - "print(Q.get_version())\n", - "Q.set_host_url('http://localhost:8080')\n", - "Q.set_default_project_dataset(\"gdc-bq-sample.dev\")" - ] - }, - { - "cell_type": "markdown", - "id": "d538da92", - "metadata": {}, - "source": [ - "
\n", - " \n", - "CDA data comes from three sources:\n", - " \n", - " \n", - "The CDA makes this data searchable in four main endpoints:\n", - "\n", - "
    \n", - "
  • subject: A patient entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subjects privacy.
  • \n", - "
  • researchsubject: A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subjects privacy. This entity plays the role of the case_id in existing data. An individual who participates in 3 studies will have 3 researchsubject IDs
  • \n", - "
  • specimen: Any material taken as a sample from a biological entity (living or dead), or from a physical object or the environment. Specimens are usually collected as an example of their kind, often for use in some investigation.
  • \n", - "
  • file: A unit of data about subjects, researchsubjects, specimens, or their associated information
  • \n", - "
\n", - " \n", - "And two endpoints that offer deeper information about data in the researchsubject endpoint:\n", - "
    \n", - "
  • diagnosis: A collection of characteristics that describe an abnormal condition of the body as assessed at a point in time. May be used to capture information about neoplastic and non-neoplastic conditions.
  • \n", - "
  • treatment: Represent medication administration or other treatment types.
  • \n", - "
\n", - "Any metadata field can be searched from any endpoint, the only difference between search types is what type of data is returned by default. This means that you can think of the CDA as a really, really enormous spreadsheet full of data. To search this enormous spreadsheet, you'd want select columns, and then filter rows.\n", - "
\n", - "\n", - "\n", - "If you are looking to build a cohort of distinct individuals who meet some criteria, search by `subject`. If you want to build a cohort, but are particularly interested in studies rather than the participates per se, search by `researchsubject`. If you are looking for biosamples that can be ordered or a specific format of information (for e.g. histological slides) start with `specimen`. If you are primarily looking for files you can reuse for your own analysis, start with `file`.\n", - "\n", - "In CDA search, these concepts can also be chained together, so you can look specifically for specimen subjects, or researchsubject diagnoses. In the four 'main' tables, all of the rows will have one or more files associated with them that can be directly found by chaining, as in specimen files. Diagnosis and treatment do not have files directly associated with them and so can only be used to find files in conjunction with the other searches.\n", - "\n", - "In all cases, any search can use any metadata field, the only difference between search types is what type of data you return by default. \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "f76026a3", - "metadata": {}, - "source": [ - "## Getting simple summary data\n", - "\n", - "Let's try a broad search of the CDA to see what information exists about cancers that were first diagnosed in the brain. To run this simple search, we would first construct a query in `Q` and save it to a variable `myquery`. This is the same query we ran in the Basic Search notebook:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "c92a98ba", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "\n", - "\n", - "\n", - "myquery = Q('primary_diagnosis_site = \"brain\"')" - ] - }, - { - "cell_type": "markdown", - "id": "6e855573", - "metadata": { - "tags": [ - "Help!" - ] - }, - "source": [ - "\n", - "
\n", - "

Where did those terms come from?

\n", - " \n", - "If you aren't sure how we knew what terms to put in our search, please refer back to the What search terms are available? notebook. \n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "4e492876", - "metadata": {}, - "source": [ - "### Overall summary\n", - "\n", - "You can get a quick summary of how many unique specimens, treatments, diagnoses, researchsubjects and subjects meet your search criteria by chaining a `count` command into the basic `run` call. " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "c8e3599f", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 15.935 sec 15935 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m15.935\u001b[0m sec \u001b[1;36m15935\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
specimen_count : 39201\n",
-       "
\n" - ], - "text/plain": [ - "specimen_count : \u001b[1;36m39201\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
treatment_count : 2386\n",
-       "
\n" - ], - "text/plain": [ - "treatment_count : \u001b[1;36m2386\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
diagnosis_count : 1756\n",
-       "
\n" - ], - "text/plain": [ - "diagnosis_count : \u001b[1;36m1756\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
mutation_count : 904\n",
-       "
\n" - ], - "text/plain": [ - "mutation_count : \u001b[1;36m904\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
researchsubject_count : 3716\n",
-       "
\n" - ], - "text/plain": [ - "researchsubject_count : \u001b[1;36m3716\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
subject_count : 2384\n",
-       "
\n" - ], - "text/plain": [ - "subject_count : \u001b[1;36m2384\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "myquery.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "d483e944", - "metadata": {}, - "source": [ - "These numbers are how many total rows of data will come back when querying the various endpoints.\n", - "\n", - "\n", - "\n", - "### subject summary\n", - "\n", - "We can also add `count`to the other run calls we did in the Basic Search notebook to get more detailed summaries:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6d9137aa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 33.746 sec 33746 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m33.746\u001b[0m sec \u001b[1;36m33746\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
    total : 2384    \n",
-       "
\n" - ], - "text/plain": [ - " total : \u001b[1;36m2384\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
  files : 4099497   \n",
-       "
\n" - ], - "text/plain": [ - " files : \u001b[1;36m4099497\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
subject_identifier_systemcount
IDC1955
PDC309
GDC1454
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sexcount
None748
female653
male980
not reported3
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
racecount
None748
white1311
black or african american96
not reported136
asian33
not allowed to collect25
american indian or alaska native4
Unknown21
other9
native hawaiian or other pacific islander1
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ethnicitycount
None748
not hispanic or latino1285
not reported219
hispanic or latino85
Unknown22
not allowed to collect25
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cause_of_deathcount
None2098
Not Reported200
Cancer Related63
Not Cancer Related9
Infection3
Unknown9
Surgical Complications2
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
with flattened_result as (SELECT results.* EXCEPT(rn) FROM (SELECT ROW_NUMBER() OVER (PARTITION BY Subject.id,     \n",
-       "_subject_identifier.system, _subject_associated_project, _subject_Files) as rn, Subject.id AS subject_id,          \n",
-       "_subject_identifier.system AS subject_identifier_system, _subject_identifier.value AS subject_identifier_value,    \n",
-       "Subject.species AS species, Subject.sex AS sex, Subject.race AS race, Subject.ethnicity AS ethnicity,              \n",
-       "Subject.days_to_birth AS days_to_birth, _subject_associated_project, Subject.vital_status AS vital_status,         \n",
-       "Subject.days_to_death AS days_to_death, Subject.cause_of_death AS cause_of_death, _subject_Files FROM              \n",
-       "gdc-bq-sample.dev.all_Subjects_v3_0_final AS Subject LEFT JOIN UNNEST(Subject.ResearchSubject) AS _ResearchSubject \n",
-       "LEFT JOIN UNNEST(Subject.identifier) AS _subject_identifier LEFT JOIN UNNEST(Subject.subject_associated_project) AS\n",
-       "_subject_associated_project LEFT JOIN UNNEST(Subject.Files) AS _subject_Files WHERE                                \n",
-       "(IFNULL(UPPER(_ResearchSubject.primary_diagnosis_site), '') = UPPER('brain'))) as results WHERE rn = 1) select     \n",
-       "(SELECT COUNT(DISTINCT subject_id) from flattened_result) as total, (SELECT COUNT(DISTINCT _subject_Files) from    \n",
-       "flattened_result) as files, (select ARRAY(select as STRUCT subject_identifier_system, count(distinct subject_id) as\n",
-       "count from flattened_result group by subject_identifier_system)) as subject_identifier_system, (select ARRAY(select\n",
-       "as STRUCT sex, count(distinct subject_id) as count from flattened_result group by sex)) as sex, (select            \n",
-       "ARRAY(select as STRUCT race, count(distinct subject_id) as count from flattened_result group by race)) as race,    \n",
-       "(select ARRAY(select as STRUCT ethnicity, count(distinct subject_id) as count from flattened_result group by       \n",
-       "ethnicity)) as ethnicity, (select ARRAY(select as STRUCT cause_of_death, count(distinct subject_id) as count from  \n",
-       "flattened_result group by cause_of_death)) as cause_of_death                                                       \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;102;217;239;48;2;39;40;34mwith\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mSELECT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mresults\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m*\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mEXCEPT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrn\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mFROM\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mSELECT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mROW_NUMBER\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mOVER\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mPARTITION\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mBY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mid\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m_subject_identifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34msystem\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_associated_project\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_Files\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrn\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mid\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m_subject_identifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34msystem\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_system\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_identifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mvalue\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_value\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mspecies\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mspecies\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdays_to_birth\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdays_to_birth\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_associated_project\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mvital_status\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mvital_status\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdays_to_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdays_to_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_Files\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mFROM\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mgdc\u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m-\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mbq\u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m-\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msample\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdev\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mall_Subjects_v3_0_final\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mLEFT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mJOIN\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUNNEST\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mResearchSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_ResearchSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;102;217;239;48;2;39;40;34mLEFT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mJOIN\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUNNEST\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34midentifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_identifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mLEFT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mJOIN\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUNNEST\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_associated_project\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m_subject_associated_project\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mLEFT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mJOIN\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUNNEST\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mFiles\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_Files\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mWHERE\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mIFNULL\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUPPER\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_ResearchSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mprimary_diagnosis_site\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;230;219;116;48;2;39;40;34m''\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m=\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUPPER\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;230;219;116;48;2;39;40;34m'brain'\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mresults\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mWHERE\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrn\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m=\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;174;129;255;48;2;39;40;34m1\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mSELECT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mCOUNT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mDISTINCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mtotal\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mSELECT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mCOUNT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mDISTINCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_Files\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mfiles\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_system\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\n", - "\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_system\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_system\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\n", - "\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "myquery.subject.count.run(show_sql=True)" - ] - }, - { - "cell_type": "markdown", - "id": "dff2da08", - "metadata": {}, - "source": [ - "Since we save the output as a variable, we need to look at the variable to see the results:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "993479db", - "metadata": {}, - "outputs": [], - "source": [ - "subjectresults" - ] - }, - { - "cell_type": "markdown", - "id": "e7e6d522", - "metadata": {}, - "source": [ - "By default, the results are displayed as a table for easy previewing of the data. Since we queried the `subject` endpoint, our default results tell us `subject` level information, that is, information about unique individuals: their sex, race, age, species, etc. Using counts gives us back a nice pivot table type summary of the countable fields for Subjects. Note that above the table it also tells you the total subject count, as well as how many files are associated with those subjects." - ] - }, - { - "cell_type": "markdown", - "id": "05e52f3f", - "metadata": {}, - "source": [ - "\n", - "---\n", - "\n", - "
\n", - "\n", - "

Subject Field Definitions

\n", - "\n", - "A patient entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subjects privacy.\n", - " \n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of subjects returned.
files The number of files that match this search.
identifier.value(`system`) The identifier for the data provider.
species The taxonomic group (e.g. species) of the subject.
sex The biologic character or quality that distinguishes male and female from one another as expressed by analysis of the person's gonadal, morphologic (internal and external), chromosomal, and hormonal characteristics.
race An arbitrary classification of a taxonomic group that is a division of a species.
ethnicity An individual's self-described social and cultural grouping.
cause_of_death The cause of death, if known
\n", - "\n", - "
\n", - " \n", - "---" - ] - }, - { - "cell_type": "markdown", - "id": "5356bf41", - "metadata": {}, - "source": [ - "This gives you a quick way to assess whether the full search results will have the data fields you require. But if you want to get the underlying data for your own downstream applications, you can also get the raw numbers by calling the zeroth value of the variable:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "869dfd5d", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'NoneType' object is not subscriptable", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [6]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msubjectresults\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mTypeError\u001b[0m: 'NoneType' object is not subscriptable" - ] - } - ], - "source": [ - "subjectresults[0]" - ] - }, - { - "cell_type": "markdown", - "id": "73745aa6", - "metadata": {}, - "source": [ - "### researchsubject\n", - "\n", - "If we're interested in what researchsubjects meet our criteria, we can also run our query against the researchsubject endpoint. Lets run it without saving to a variable this time to make it a bit quicker:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "a2d8c874", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: Unrecognized name: _researchsubject_identifier at [1:121]\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: Unrecognized name: _researchsubject_identifier at \u001b[1m[\u001b[0m\u001b[1;92m1:121\u001b[0m\u001b[1m]\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 1.08 sec 1080 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m1.08\u001b[0m sec \u001b[1;36m1080\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.researchsubject.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "4564beee", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "---\n", - "\n", - "
\n", - "\n", - "

ResearchSubject Field Definitions

\n", - "\n", - "A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subjects privacy. This entity plays the role of the case_id in existing data. An individual who participates in 3 studies will have 3 researchsubject IDs\n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of researchsubjects returned.
files The number of files that match this search.
identifier.value(`system`) The identifier for the data provider.
primary_diagnosis_condition The text term used to describe the type of malignant disease.
primary_diagnosis_site The text term used to describe the primary site of disease.
\n", - "
\n", - " \n", - "---" - ] - }, - { - "cell_type": "markdown", - "id": "8a67cf09", - "metadata": {}, - "source": [ - "### diagnosis\n", - "\n", - "The diagnosis endpoint is an extension of the researchsubject endpoint, and returns information about researchsubjects that have a diagnosis that meets our search criteria. :" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "7770d68c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: Unrecognized name: _diagnosis_identifier at [1:115]\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: Unrecognized name: _diagnosis_identifier at \u001b[1m[\u001b[0m\u001b[1;92m1:115\u001b[0m\u001b[1m]\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.942 sec 942 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.942\u001b[0m sec \u001b[1;36m942\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.diagnosis.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "eb73357c", - "metadata": {}, - "source": [ - "---\n", - "\n", - "
\n", - "\n", - "

Diagnosis Field Definitions

\n", - "\n", - "A collection of characteristics that describe an abnormal condition of the body as assessed at a point in time. May be used to capture information about neoplastic and non-neoplastic conditions.\n", - "\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of diagnoses returned.
identifier.value(`system`) The identifier for the data provider.
primary_diagnosis The diagnosis instance that qualified a subject for inclusion on a ResearchProject.
stage The extent of a cancer in the body.
grade The degree of abnormality of cancer cells.
\n", - "\n", - "\n", - "
\n", - " \n", - "---\n" - ] - }, - { - "cell_type": "markdown", - "id": "97442718", - "metadata": {}, - "source": [ - "### treatment\n", - "\n", - "The treatment endpoint is an extension of diagnosis and returns information about treatments undertaken on research subjects that have a given diagnosis that meets our search criteria:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "be1ac64f", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: Unrecognized name: _treatment_identifier at [1:115]\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: Unrecognized name: _treatment_identifier at \u001b[1m[\u001b[0m\u001b[1;92m1:115\u001b[0m\u001b[1m]\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.732 sec 732 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.732\u001b[0m sec \u001b[1;36m732\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.treatment.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "ec5dda7f", - "metadata": {}, - "source": [ - "\n", - "---\n", - "\n", - "
\n", - "\n", - "

Treatment Field Definitions

\n", - "\n", - " Medication administration or other treatment types. A single research subject may have multiple treatments for a single diagnosis, and/or different diagnoses, and different treatments, across different studies\n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of treatments returned.
identifier.value(`system`) The identifier for the data provider.
treatment_type The treatment type including medication/therapeutics or other procedures.
treatment_effectThe effect of a treatment on the diagnosis or tumor.
\n", - " \n", - "\n", - "
\n", - " \n", - "---\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "9c00bec8", - "metadata": {}, - "source": [ - "### specimens\n", - "\n", - "We can use this same query to see what specimens are available for brain tissue at the CDA:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "51960eed", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: Unrecognized name: _specimen_identifier at [1:114]\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: Unrecognized name: _specimen_identifier at \u001b[1m[\u001b[0m\u001b[1;92m1:114\u001b[0m\u001b[1m]\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.766 sec 766 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.766\u001b[0m sec \u001b[1;36m766\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.specimen.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "7f254547", - "metadata": {}, - "source": [ - "Nearly 40,000 specimens with over 50,000 files meet our search criteria! We would typically expect this number to be much larger than our number of subjects or research_subjects. First because studies will often take more than one sample per subject, and second because any given specimen might be aliquoted out to be used in multiple tests. " - ] - }, - { - "cell_type": "markdown", - "id": "b3ed75e5", - "metadata": {}, - "source": [ - "
\n", - "\n", - "

Specimen Field Definitions

\n", - "\n", - "Any material taken as a sample from a biological entity (living or dead), or from a physical object or the environment. Specimens are usually collected as an example of their kind, often for use in some investigation.\n", - " A given specimen will have only a single subject ID and a single research subject ID\n", - " \n", - " \n", - "\n", - " \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of specimens returned.
files The number of files that match this search.
identifier.value(`system`) The identifier for the data provider.
primary_disease_type The text term used to describe the type of malignant disease.
source_material_type The general kind of material from which the specimen was derived.
specimen_type The high-level type of the specimen, based on its how it has been derived from the original extracted sample. One of: analyte, aliquot, portion, sample, or slide.
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "5ccb0f1c", - "metadata": {}, - "source": [ - "### file\n", - "\n", - "The file endpoint returns all files that match our query:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "d6e0639a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: None\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: \u001b[3;35mNone\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.104 sec 104 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.104\u001b[0m sec \u001b[1;36m104\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.file.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "e34044c9", - "metadata": {}, - "source": [ - "There are a huge number of files (4099497) that match our search. Likely we would want to additionally filter the results by file format or data type to get only files we can use. See all the ways you can filter and refine searches with more search terms in the Operators notebook.\n", - "\n", - "\n", - "
\n", - "\n", - "

File Field Definitions

\n", - "\n", - "A file is an information-bearing electronic object that contains a physical embodiment of some information using a particular character encoding.\n", - "\n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of files returned.
identifier.value(`system`) The identifier for the data provider.
data_catagoryBroad categorization of the contents of the data file.
data_typeSpecific content type of the data file.
file_formatFormat of the data files.
\n", - "\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "a899b6ca", - "metadata": {}, - "source": [ - "### mutation\n", - "\n", - "The mutation endpoint returns all mutations that match our query:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "900f8b52", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 8.545 sec 8545 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m8.545\u001b[0m sec \u001b[1;36m8545\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
    total : 904     \n",
-       "
\n" - ], - "text/plain": [ - " total : \u001b[1;36m904\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "KeyError", - "evalue": "'ncbi_build'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m~/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/IPython/core/formatters.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[0mtype_pprinters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype_printers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 701\u001b[0m deferred_pprinters=self.deferred_printers)\n\u001b[0;32m--> 702\u001b[0;31m \u001b[0mprinter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpretty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 703\u001b[0m \u001b[0mprinter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 704\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mstream\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetvalue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/IPython/lib/pretty.py\u001b[0m in \u001b[0;36mpretty\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 392\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 393\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dict__\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'__repr__'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 394\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_repr_pprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcycle\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 395\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_default_pprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcycle\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/IPython/lib/pretty.py\u001b[0m in \u001b[0;36m_repr_pprint\u001b[0;34m(obj, p, cycle)\u001b[0m\n\u001b[1;32m 698\u001b[0m \u001b[0;34m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 699\u001b[0m \u001b[0;31m# Find newlines and replace them with p.break_()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 700\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrepr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 701\u001b[0m \u001b[0mlines\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplitlines\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py\u001b[0m in \u001b[0;36m__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__repr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 89\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_repr_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshow_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow_sql\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/count_result.py\u001b[0m in \u001b[0;36m_repr_value\u001b[0;34m(self, show_value)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 45\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 46\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"null\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: 'ncbi_build'" - ] - } - ], - "source": [ - "myquery.mutation.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "6a4fd46e", - "metadata": {}, - "source": [ - "## Files from a single endpoint (endpoint chaining)\n", - "\n", - "If you want all file formats and data types, but only from a specific endpoint, you can also filter the file results by chaining endpoints together. This will return all the files that match our search AND that are specifically from specimens:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "2cfb2582", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: None\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: \u001b[3;35mNone\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.117 sec 117 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.117\u001b[0m sec \u001b[1;36m117\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.specimen.file.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "6994e312", - "metadata": {}, - "source": [ - "Learn more about chaining endpoints in the [Chaining endpoints](\"../AdvancedSearch-Chaining\") notebook." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "metadata": { - "interpreter": { - "hash": "5c867d1980d5b66d2bfc8a5903dcee074b4b68f3917a4f27f8a310cab24e9f1b" - } - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Untitled-2.ipynb b/Untitled-2.ipynb deleted file mode 100644 index a1b14401..00000000 --- a/Untitled-2.ipynb +++ /dev/null @@ -1,155 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                            Total execution time: 0\n",
-       "                            min 3.755 sec 3755 ms\n",
-       "                            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m3.755\u001b[0m sec \u001b[1;36m3755\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": [
-       "\u001b[?25l"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[?25h" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[10], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m set_host_url(\u001b[39m\"\u001b[39m\u001b[39mhttp://35.192.60.10:8080\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m set_table_version(\u001b[39m\"\u001b[39m\u001b[39mall_Subjects_v3_0_final\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m a \u001b[39m=\u001b[39m Q(\u001b[39m'\u001b[39;49m\u001b[39msex = \u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m%\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m AND researchsubject_identifier_system = \u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mIDC\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m'\u001b[39;49m)\u001b[39m.\u001b[39;49mrun()\u001b[39m.\u001b[39;49mget_all()\n\u001b[1;32m 7\u001b[0m \u001b[39mprint\u001b[39m(a)\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py:206\u001b[0m, in \u001b[0;36mResult.get_all\u001b[0;34m(self, output, limit, show_bar)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[39m# add this to cast to a subclass of CollectResult\u001b[39;00m\n\u001b[1;32m 202\u001b[0m collect_result: \u001b[39m\"\u001b[39m\u001b[39mCollectResult\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m=\u001b[39m cast(\n\u001b[1;32m 203\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mCollectResult\u001b[39m\u001b[39m\"\u001b[39m, ResultFactory\u001b[39m.\u001b[39mcreate_entity(COLLECT_RESULT, \u001b[39mself\u001b[39m)\n\u001b[1;32m 204\u001b[0m )\n\u001b[0;32m--> 206\u001b[0m \u001b[39mfor\u001b[39;00m index, i \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(iterator):\n\u001b[1;32m 207\u001b[0m \u001b[39mif\u001b[39;00m index \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 208\u001b[0m \u001b[39mcontinue\u001b[39;00m\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Paginator.py:156\u001b[0m, in \u001b[0;36mPaginator.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprogress\u001b[39m.\u001b[39mtasks:\n\u001b[1;32m 155\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprogress\u001b[39m.\u001b[39mremove_task(i\u001b[39m.\u001b[39mid)\n\u001b[0;32m--> 156\u001b[0m \u001b[39mraise\u001b[39;00m e\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Paginator.py:142\u001b[0m, in \u001b[0;36mPaginator.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m\n\u001b[1;32m 140\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcount \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult\u001b[39m.\u001b[39mcount\n\u001b[0;32m--> 142\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_do_next()\n\u001b[1;32m 143\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 144\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mshow_bar:\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Paginator.py:89\u001b[0m, in \u001b[0;36mPaginator._do_next\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult\u001b[39m.\u001b[39mhas_next_page:\n\u001b[1;32m 88\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 89\u001b[0m tmp_result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mresult\u001b[39m.\u001b[39;49mnext_page(limit\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlimit)\n\u001b[1;32m 90\u001b[0m \u001b[39mif\u001b[39;00m tmp_result:\n\u001b[1;32m 91\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult \u001b[39m=\u001b[39m tmp_result\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py:267\u001b[0m, in \u001b[0;36mResult.next_page\u001b[0;34m(self, limit, async_req, pre_stream)\u001b[0m\n\u001b[1;32m 265\u001b[0m _offset: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_offset \u001b[39m+\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_limit\n\u001b[1;32m 266\u001b[0m _limit: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m limit \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_limit\n\u001b[0;32m--> 267\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_result(_offset, _limit, async_req, pre_stream)\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py:294\u001b[0m, in \u001b[0;36mResult._get_result\u001b[0;34m(self, _offset, _limit, async_req, pre_stream)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_get_result\u001b[39m(\n\u001b[1;32m 288\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 289\u001b[0m _offset: \u001b[39mint\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 292\u001b[0m pre_stream: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m 293\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Union[Result, StringResult, ColumnsResult, \u001b[39mNone\u001b[39;00m]:\n\u001b[0;32m--> 294\u001b[0m \u001b[39mreturn\u001b[39;00m get_query_result(\n\u001b[1;32m 295\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m__class__\u001b[39;49m,\n\u001b[1;32m 296\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_api_instance,\n\u001b[1;32m 297\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_query_id,\n\u001b[1;32m 298\u001b[0m _offset,\n\u001b[1;32m 299\u001b[0m _limit,\n\u001b[1;32m 300\u001b[0m async_req,\n\u001b[1;32m 301\u001b[0m pre_stream,\n\u001b[1;32m 302\u001b[0m format_type\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mformat_type,\n\u001b[1;32m 303\u001b[0m )\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py:345\u001b[0m, in \u001b[0;36mget_query_result\u001b[0;34m(clz, api_instance, query_id, offset, limit, async_req, pre_stream, show_sql, show_count, format_type)\u001b[0m\n\u001b[1;32m 342\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(response, ApplyResult):\n\u001b[1;32m 343\u001b[0m response \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mget()\n\u001b[0;32m--> 345\u001b[0m sleep(\u001b[39m2.5\u001b[39;49m)\n\u001b[1;32m 346\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mtotal_row_count \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 347\u001b[0m \u001b[39mreturn\u001b[39;00m clz(\n\u001b[1;32m 348\u001b[0m response,\n\u001b[1;32m 349\u001b[0m query_id,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 355\u001b[0m format_type,\n\u001b[1;32m 356\u001b[0m )\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "from cdapython import Q,set_default_project_dataset,set_host_url,set_table_version\n", - "set_default_project_dataset(\"gdc-bq-sample.dev\")\n", - "set_host_url(\"http://35.192.60.10:8080\")\n", - "set_table_version(\"all_Subjects_v3_0_final\")\n", - "\n", - "a = Q('sex = \"%\" AND researchsubject_identifier_system = \"IDC\"').run().get_all()\n", - "print(a)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "ven3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "eae9c1a3dd8e2f898c643d98dd719bbe12e700aeabe7bf687912cfc443f15d3e" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Untitled.ipynb b/Untitled.ipynb deleted file mode 100644 index cce1743b..00000000 --- a/Untitled.ipynb +++ /dev/null @@ -1,389 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "a93a79c9-352f-41cb-9113-6daadaef1d99", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'2022.7.13'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cdapython import Q,columns,unique_terms\n", - "Q.get_version()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "525765be-da36-4349-8ca7-546fc95c15d7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
https://cancerdata.dsde-dev.broadinstitute.org/\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[4;94mhttps://cancerdata.dsde-dev.broadinstitute.org/\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
broad-dsde-dev.cda_dev\n",
-       "
\n" - ], - "text/plain": [ - "broad-dsde-dev.cda_dev\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "Q.set_default_project_dataset(\"broad-dsde-dev.cda_dev\")\n", - "Q.set_host_url(\"https://cancerdata.dsde-dev.broadinstitute.org/\")\n", - "\n", - "print(Q.get_host_url())\n", - "print(Q.get_default_project_dataset())" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "31a2d29e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Total execution time: 4309 ms 4.309 sec 0 min\n",
-       "
\n" - ], - "text/plain": [ - "Total execution time: \u001b[1;36m4309\u001b[0m ms \u001b[1;36m4.309\u001b[0m sec \u001b[1;36m0\u001b[0m min\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - " mylist = Q('ResearchSubject.Specimen.specimen_type= \"slide\" OR file.data_type= \"Slide Image\"').specimen.file.run().to_comma_str(\"identifier\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "051e1ff6-e493-4a39-81ad-4459fef8a980", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Total execution time: 0 min 4.989 sec 4989 ms \n",
-       "
\n" - ], - "text/plain": [ - "Total execution time: \u001b[1;36m0\u001b[0m min \u001b[1;36m4.989\u001b[0m sec \u001b[1;36m4989\u001b[0m ms \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2c67b57005d34d93b3a2b5380d6776d9", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
39864\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m39864\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "n = Q(\"sex = 'male' AND sex != 'null'\").run(limit=1000,async_call=True)\n", - "box = []\n", - "for i in n.paginator(limit=2000):\n", - " box.extend(i)\n", - "print(len(box))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "75d36ce1-b772-4120-a695-68eb39f6e3dd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                            id  \\\n",
-       "0                         2004   \n",
-       "1                         2235   \n",
-       "2                         2377   \n",
-       "3                         2458   \n",
-       "4                         3566   \n",
-       "...                        ...   \n",
-       "39859             TCGA-XK-AAJU   \n",
-       "39860             TCGA-XP-A8T6   \n",
-       "39861             TCGA-ZF-A9R5   \n",
-       "39862  UTRI_SUBJECT_001_000573   \n",
-       "39863  UTRI_SUBJECT_001_000588   \n",
-       "\n",
-       "                                              identifier       species   sex  \\\n",
-       "0                   [{'system': 'GDC', 'value': '2004'}]  homo sapiens  male   \n",
-       "1                   [{'system': 'GDC', 'value': '2235'}]  homo sapiens  male   \n",
-       "2                   [{'system': 'GDC', 'value': '2377'}]  homo sapiens  male   \n",
-       "3                   [{'system': 'GDC', 'value': '2458'}]  homo sapiens  male   \n",
-       "4                   [{'system': 'GDC', 'value': '3566'}]  homo sapiens  male   \n",
-       "...                                                  ...           ...   ...   \n",
-       "39859  [{'system': 'GDC', 'value': 'TCGA-XK-AAJU'}, {...  homo sapiens  male   \n",
-       "39860  [{'system': 'GDC', 'value': 'TCGA-XP-A8T6'}, {...  homo sapiens  male   \n",
-       "39861  [{'system': 'GDC', 'value': 'TCGA-ZF-A9R5'}, {...  homo sapiens  male   \n",
-       "39862  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "39863  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "\n",
-       "                            race               ethnicity  days_to_birth  \\\n",
-       "0                          white  not hispanic or latino            NaN   \n",
-       "1                        Unknown                 Unknown            NaN   \n",
-       "2      black or african american                 Unknown            NaN   \n",
-       "3                        Unknown                 Unknown            NaN   \n",
-       "4                   not reported            not reported            NaN   \n",
-       "...                          ...                     ...            ...   \n",
-       "39859                      white            not reported       -23958.0   \n",
-       "39860  black or african american      hispanic or latino       -19886.0   \n",
-       "39861                      white  not hispanic or latino       -21811.0   \n",
-       "39862               not reported            not reported            NaN   \n",
-       "39863               not reported            not reported            NaN   \n",
-       "\n",
-       "      subject_associated_project  vital_status  days_to_death cause_of_death  \n",
-       "0            [BEATAML1.0-COHORT]          Dead            NaN           None  \n",
-       "1            [BEATAML1.0-COHORT]          Dead            NaN           None  \n",
-       "2            [BEATAML1.0-COHORT]         Alive            NaN           None  \n",
-       "3            [BEATAML1.0-COHORT]          Dead            NaN           None  \n",
-       "4                     [OHSU-CNL]          Dead            NaN           None  \n",
-       "...                          ...           ...            ...            ...  \n",
-       "39859     [TCGA-PRAD, tcga_prad]         Alive            NaN           None  \n",
-       "39860     [tcga_esca, TCGA-ESCA]          Dead          763.0           None  \n",
-       "39861     [TCGA-BLCA, tcga_blca]         Alive            NaN           None  \n",
-       "39862                 [TRIO-CRU]  Not Reported            NaN           None  \n",
-       "39863                 [TRIO-CRU]  Not Reported            NaN           None  \n",
-       "\n",
-       "[39864 rows x 11 columns]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[37;44m id \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2004\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2235\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2377\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2458\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m3566\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39859\u001b[0m\u001b[37;44m TCGA-XK-AAJU \u001b[0m\n", - "\u001b[1;36;44m39860\u001b[0m\u001b[37;44m TCGA-XP-A8T6 \u001b[0m\n", - "\u001b[1;36;44m39861\u001b[0m\u001b[37;44m TCGA-ZF-A9R5 \u001b[0m\n", - "\u001b[1;36;44m39862\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000573 \u001b[0m\n", - "\u001b[1;36;44m39863\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000588 \u001b[0m\n", - "\n", - "\u001b[37;44m identifier species sex \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2004'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2235'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2377'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2458'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'3566'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39859\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-XK-AAJU'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m39860\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-XP-A8T6'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m39861\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-ZF-A9R5'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m39862\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m39863\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\n", - "\u001b[37;44m race ethnicity days_to_birth \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m Unknown Unknown NaN \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m black or african american Unknown NaN \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m Unknown Unknown NaN \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39859\u001b[0m\u001b[37;44m white not reported \u001b[0m\u001b[1;36;44m-23958.0\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39860\u001b[0m\u001b[37;44m black or african american hispanic or latino \u001b[0m\u001b[1;36;44m-19886.0\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39861\u001b[0m\u001b[37;44m white not hispanic or latino \u001b[0m\u001b[1;36;44m-21811.0\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39862\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\u001b[1;36;44m39863\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\n", - "\u001b[37;44m subject_associated_project vital_status days_to_death cause_of_death \u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mOHSU-CNL\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39859\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTCGA-PRAD, tcga_prad\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39860\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mtcga_esca, TCGA-ESCA\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead \u001b[0m\u001b[1;36;44m763.0\u001b[0m\u001b[37;44m \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39861\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTCGA-BLCA, tcga_blca\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39862\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39863\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\n", - "\u001b[1;37;44m[\u001b[0m\u001b[1;36;44m39864\u001b[0m\u001b[37;44m rows x \u001b[0m\u001b[1;36;44m11\u001b[0m\u001b[37;44m columns\u001b[0m\u001b[1;37;44m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import pandas as pd\n", - "df = pd.DataFrame(box)\n", - "print(df, style=\"white on blue\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "284ab4e3-8c39-452c-bcae-2a6d08b9b29e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Untitled1.ipynb b/Untitled1.ipynb deleted file mode 100644 index f72340f3..00000000 --- a/Untitled1.ipynb +++ /dev/null @@ -1,457 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "b974248c-48b3-4233-8211-4677aa63d377", - "metadata": {}, - "outputs": [ - { - "data": { - "application/javascript": "/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */\n!function(e,t){\"use strict\";\"object\"==typeof module&&\"object\"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error(\"jQuery requires a window with a document\");return t(e)}:t(e)}(\"undefined\"!=typeof window?window:this,function(C,e){\"use strict\";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return\"function\"==typeof e&&\"number\"!=typeof e.nodeType&&\"function\"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement(\"script\");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+\"\":\"object\"==typeof e||\"function\"==typeof e?n[o.call(e)]||\"object\":typeof e}var f=\"3.6.0\",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&\"length\"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&(\"array\"===n||0===t||\"number\"==typeof t&&0+~]|\"+M+\")\"+M+\"*\"),U=new RegExp(M+\"|>\"),X=new RegExp(F),V=new RegExp(\"^\"+I+\"$\"),G={ID:new RegExp(\"^#(\"+I+\")\"),CLASS:new RegExp(\"^\\\\.(\"+I+\")\"),TAG:new RegExp(\"^(\"+I+\"|[*])\"),ATTR:new RegExp(\"^\"+W),PSEUDO:new RegExp(\"^\"+F),CHILD:new RegExp(\"^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\\\(\"+M+\"*(even|odd|(([+-]|)(\\\\d*)n|)\"+M+\"*(?:([+-]|)\"+M+\"*(\\\\d+)|))\"+M+\"*\\\\)|)\",\"i\"),bool:new RegExp(\"^(?:\"+R+\")$\",\"i\"),needsContext:new RegExp(\"^\"+M+\"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\\\(\"+M+\"*((?:-\\\\d)?\\\\d*)\"+M+\"*\\\\)|)(?=[^-]|$)\",\"i\")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\\d$/i,K=/^[^{]+\\{\\s*\\[native \\w/,Z=/^(?:#([\\w-]+)|(\\w+)|\\.([\\w-]+))$/,ee=/[+~]/,te=new RegExp(\"\\\\\\\\[\\\\da-fA-F]{1,6}\"+M+\"?|\\\\\\\\([^\\\\r\\\\n\\\\f])\",\"g\"),ne=function(e,t){var n=\"0x\"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\\0-\\x1f\\x7f]|^-?\\d)|^-$|[^\\0-\\x1f\\x7f-\\uFFFF\\w-]/g,ie=function(e,t){return t?\"\\0\"===e?\"\\ufffd\":e.slice(0,-1)+\"\\\\\"+e.charCodeAt(e.length-1).toString(16)+\" \":\"\\\\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&\"fieldset\"===e.nodeName.toLowerCase()},{dir:\"parentNode\",next:\"legend\"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],\"string\"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+\" \"]&&(!v||!v.test(t))&&(1!==p||\"object\"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute(\"id\"))?s=s.replace(re,ie):e.setAttribute(\"id\",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?\"#\"+s:\":scope\")+\" \"+xe(l[o]);c=l.join(\",\")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute(\"id\")}}}return g(t.replace($,\"$1\"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+\" \")>b.cacheLength&&delete e[r.shift()],e[t+\" \"]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement(\"fieldset\");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split(\"|\"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return\"input\"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return(\"input\"===t||\"button\"===t)&&e.type===n}}function ge(t){return function(e){return\"form\"in e?e.parentNode&&!1===e.disabled?\"label\"in e?\"label\"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:\"label\"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&\"undefined\"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||\"HTML\")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener(\"unload\",oe,!1):n.attachEvent&&n.attachEvent(\"onunload\",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement(\"div\")),\"undefined\"!=typeof e.querySelectorAll&&!e.querySelectorAll(\":scope fieldset div\").length}),d.attributes=ce(function(e){return e.className=\"i\",!e.getAttribute(\"className\")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment(\"\")),!e.getElementsByTagName(\"*\").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute(\"id\")===t}},b.find.ID=function(e,t){if(\"undefined\"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t=\"undefined\"!=typeof e.getAttributeNode&&e.getAttributeNode(\"id\");return t&&t.value===n}},b.find.ID=function(e,t){if(\"undefined\"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode(\"id\"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode(\"id\"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return\"undefined\"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if(\"*\"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if(\"undefined\"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML=\"\",e.querySelectorAll(\"[msallowcapture^='']\").length&&v.push(\"[*^$]=\"+M+\"*(?:''|\\\"\\\")\"),e.querySelectorAll(\"[selected]\").length||v.push(\"\\\\[\"+M+\"*(?:value|\"+R+\")\"),e.querySelectorAll(\"[id~=\"+S+\"-]\").length||v.push(\"~=\"),(t=C.createElement(\"input\")).setAttribute(\"name\",\"\"),e.appendChild(t),e.querySelectorAll(\"[name='']\").length||v.push(\"\\\\[\"+M+\"*name\"+M+\"*=\"+M+\"*(?:''|\\\"\\\")\"),e.querySelectorAll(\":checked\").length||v.push(\":checked\"),e.querySelectorAll(\"a#\"+S+\"+*\").length||v.push(\".#.+[+~]\"),e.querySelectorAll(\"\\\\\\f\"),v.push(\"[\\\\r\\\\n\\\\f]\")}),ce(function(e){e.innerHTML=\"\";var t=C.createElement(\"input\");t.setAttribute(\"type\",\"hidden\"),e.appendChild(t).setAttribute(\"name\",\"D\"),e.querySelectorAll(\"[name=d]\").length&&v.push(\"name\"+M+\"*[*^$|!~]?=\"),2!==e.querySelectorAll(\":enabled\").length&&v.push(\":enabled\",\":disabled\"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(\":disabled\").length&&v.push(\":enabled\",\":disabled\"),e.querySelectorAll(\"*,:x\"),v.push(\",.*:\")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,\"*\"),c.call(e,\"[s!='']:x\"),s.push(\"!=\",F)}),v=v.length&&new RegExp(v.join(\"|\")),s=s.length&&new RegExp(s.join(\"|\")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+\" \"]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0\":{dir:\"parentNode\",first:!0},\" \":{dir:\"parentNode\"},\"+\":{dir:\"previousSibling\",first:!0},\"~\":{dir:\"previousSibling\"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||\"\").replace(te,ne),\"~=\"===e[2]&&(e[3]=\" \"+e[3]+\" \"),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),\"nth\"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*(\"even\"===e[3]||\"odd\"===e[3])),e[5]=+(e[7]+e[8]||\"odd\"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||\"\":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(\")\",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return\"*\"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+\" \"];return t||(t=new RegExp(\"(^|\"+M+\")\"+e+\"(\"+M+\"|$)\"))&&m(e,function(e){return t.test(\"string\"==typeof e.className&&e.className||\"undefined\"!=typeof e.getAttribute&&e.getAttribute(\"class\")||\"\")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?\"!=\"===r:!r||(t+=\"\",\"=\"===r?t===i:\"!=\"===r?t!==i:\"^=\"===r?i&&0===t.indexOf(i):\"*=\"===r?i&&-1\",\"#\"===e.firstChild.getAttribute(\"href\")})||fe(\"type|href|height|width\",function(e,t,n){if(!n)return e.getAttribute(t,\"type\"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML=\"\",e.firstChild.setAttribute(\"value\",\"\"),\"\"===e.firstChild.getAttribute(\"value\")})||fe(\"value\",function(e,t,n){if(!n&&\"input\"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute(\"disabled\")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);S.find=d,S.expr=d.selectors,S.expr[\":\"]=S.expr.pseudos,S.uniqueSort=S.unique=d.uniqueSort,S.text=d.getText,S.isXMLDoc=d.isXML,S.contains=d.contains,S.escapeSelector=d.escape;var h=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&S(e).is(n))break;r.push(e)}return r},T=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},k=S.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var N=/^<([a-z][^\\/\\0>:\\x20\\t\\r\\n\\f]*)[\\x20\\t\\r\\n\\f]*\\/?>(?:<\\/\\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):\"string\"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,\"string\"==typeof e){if(!(r=\"<\"===e[0]&&\">\"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\\x20\\t\\r\\n\\f]*)/i,he=/^$|^module$|\\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement(\"div\")),(fe=E.createElement(\"input\")).setAttribute(\"type\",\"radio\"),fe.setAttribute(\"checked\",\"checked\"),fe.setAttribute(\"name\",\"t\"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML=\"\",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML=\"\",y.option=!!ce.lastChild;var ge={thead:[1,\"\",\"
\"],col:[2,\"\",\"
\"],tr:[2,\"\",\"
\"],td:[3,\"\",\"
\"],_default:[0,\"\",\"\"]};function ve(e,t){var n;return n=\"undefined\"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||\"*\"):\"undefined\"!=typeof e.querySelectorAll?e.querySelectorAll(t||\"*\"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n\",\"\"]);var me=/<|&#?\\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\\s*$/g;function je(e,t){return A(e,\"table\")&&A(11!==t.nodeType?t:t.firstChild,\"tr\")&&S(e).children(\"tbody\")[0]||e}function De(e){return e.type=(null!==e.getAttribute(\"type\"))+\"/\"+e.type,e}function qe(e){return\"true/\"===(e.type||\"\").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute(\"type\"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,\"handle events\"),s)for(n=0,r=s[i].length;n\").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on(\"load error\",i=function(e){r.remove(),i=null,e&&t(\"error\"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\\?(?=&|$)|\\?\\?/;S.ajaxSetup({jsonp:\"callback\",jsonpCallback:function(){var e=zt.pop()||S.expando+\"_\"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter(\"json jsonp\",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?\"url\":\"string\"==typeof e.data&&0===(e.contentType||\"\").indexOf(\"application/x-www-form-urlencoded\")&&Ut.test(e.data)&&\"data\");if(a||\"jsonp\"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,\"$1\"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?\"&\":\"?\")+e.jsonp+\"=\"+r),e.converters[\"script json\"]=function(){return o||S.error(r+\" was not called\"),o[0]},e.dataTypes[0]=\"json\",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),\"script\"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument(\"\").body).innerHTML=\"
\",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return\"string\"!=typeof e?[]:(\"boolean\"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument(\"\")).createElement(\"base\")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(\" \");return-1\").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,\"position\"),c=S(e),f={};\"static\"===l&&(e.style.position=\"relative\"),s=c.offset(),o=S.css(e,\"top\"),u=S.css(e,\"left\"),(\"absolute\"===l||\"fixed\"===l)&&-1<(o+u).indexOf(\"auto\")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),\"using\"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if(\"fixed\"===S.css(r,\"position\"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&\"static\"===S.css(e,\"position\"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,\"borderTopWidth\",!0),i.left+=S.css(e,\"borderLeftWidth\",!0))}return{top:t.top-i.top-S.css(r,\"marginTop\",!0),left:t.left-i.left-S.css(r,\"marginLeft\",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&\"static\"===S.css(e,\"position\"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:\"pageXOffset\",scrollTop:\"pageYOffset\"},function(t,i){var o=\"pageYOffset\"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each([\"top\",\"left\"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+\"px\":t})}),S.each({Height:\"height\",Width:\"width\"},function(a,s){S.each({padding:\"inner\"+a,content:s,\"\":\"outer\"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||\"boolean\"!=typeof e),i=r||(!0===e||!0===t?\"margin\":\"border\");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf(\"outer\")?e[\"inner\"+a]:e.document.documentElement[\"client\"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body[\"scroll\"+a],r[\"scroll\"+a],e.body[\"offset\"+a],r[\"offset\"+a],r[\"client\"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each([\"ajaxStart\",\"ajaxStop\",\"ajaxComplete\",\"ajaxError\",\"ajaxSuccess\",\"ajaxSend\"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,\"**\"):this.off(t,e||\"**\",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each(\"blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu\".split(\" \"),function(e,n){S.fn[n]=function(e,t){return 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import Q,unique_terms\n", - "from itables import init_notebook_mode\n", - "\n", - "init_notebook_mode(all_interactive=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "1764bcf2-8ecf-45c8-b617-fe239ceb7b95", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
http://35.192.60.10:8080/\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[4;94mhttp://35.192.60.10:8080/\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
gdc-bq-sample.dev\n",
-       "
\n" - ], - "text/plain": [ - "gdc-bq-sample.dev\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "integration_host = \"http://35.192.60.10:8080/\"\n", - "localhost = \"http://localhost:8080\"\n", - "broad_dev = \"https://cancerdata.dsde-dev.broadinstitute.org/\"\n", - "project_in = \"gdc-bq-sample.dev\"\n", - "project_broad_dev = \"broad-dsde-dev.cda_dev\"\n", - "Q.set_default_project_dataset(project_in)\n", - "Q.set_host_url(integration_host)\n", - "\n", - "print(Q.get_host_url())\n", - "print(Q.get_default_project_dataset())\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81058c54-afe6-4592-b5e7-46c61e646ff1", - "metadata": {}, - "outputs": [], - "source": [ - "mylist = Q('ResearchSubject.Specimen.specimen_type= \"slide\" OR file.data_type = \"Slide Image\"').specimen.file.run(filter=\"id\"\n", - ", show_sql=True)\n", - "df = mylist.to_dataframe()\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cf27f129", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "slidequery = Q(\n", - " 'File.data_type = \"Slide Image\" OR ResearchSubject.Specimen.source_material_type = \"Slides\" OR ResearchSubject.Specimen.specimen_type = \"slide\"'\n", - ")\n", - "\n", - "cptacquery = Q(\n", - " 'File.associated_project = \"%cptac%\" OR subject_associated_project = \"%cptac%\" OR ResearchSubject.member_of_research_project = \"%cptac%\" OR ResearchSubject.Specimen.associated_project = \"%cptac%\"'\n", - ")\n", - "\n", - "myquery = slidequery.AND(cptacquery)\n", - "\n", - "\n", - "\n", - "\n", - "import pandas as pd\n", - "from pandas import DataFrame\n", - "t = myquery.specimen.run()\n", - "\n", - "d = t.join_as_str(\"subject_id\")\n", - "b = t.join_as_str(\"primary_disease_type\")\n", - "\n", - "\n", - "\n", - "\n", - "a = DataFrame([d])\n", - "b2 = DataFrame([b])\n", - "\n", - "\n", - "v = pd.merge(a,b2)\n", - "v\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "beaec8e2", - "metadata": {}, - "outputs": [], - "source": [ - "slidequery = Q(\n", - " 'File.data_type = \"Slide Image\" OR ResearchSubject.Specimen.source_material_type = \"Slides\" OR ResearchSubject.Specimen.specimen_type = \"slide\"')\n", - "\n", - "subids = slidequery.subject.run(filter='id', limit=100000).join_as_str(key=\"id\",delimiter=\",\")\n", - "subfiles = Q(f'id IN ({subids})').to_json()\n", - "print(subfiles)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "658895d4-6a85-4224-a610-3cdd53b450d3", - "metadata": {}, - "outputs": [], - "source": [ - "p = mylist.auto_paginator(to_df=True,limit=20000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "244175d4", - "metadata": {}, - "outputs": [], - "source": [ - "p.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1b65a54-92aa-4d76-ab06-d9d8dd18ce33", - "metadata": {}, - "outputs": [], - "source": [ - "p.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5182c3c-1338-445e-922c-40080f784254", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "d = unique_terms(\n", - " \"species\", host=localhost, table=\"gdc-bq-sample.dev\", show_sql=True\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20d0de23-1a78-4bdc-bd28-0af00b6b44c0", - "metadata": {}, - "outputs": [], - "source": [ - "d.to_list()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81f0a38a-3cdf-4330-82bd-850cf2c094d6", - "metadata": {}, - "outputs": [], - "source": [ - "d = Q('File.associated_project = \"%cptac%\"').file.run(limit=2000,async_call=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "78e7ec96", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;225;190;106;48;2;64;176;166mGetting results from database\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Total execution time: 0 min 4.243 sec 4243 ms \n",
-       "
\n" - ], - "text/plain": [ - "Total execution time: \u001b[1;36m0\u001b[0m min \u001b[1;36m4.243\u001b[0m sec \u001b[1;36m4243\u001b[0m ms \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\"white\",\"not reported\",\"white\",\"white\",\"white\",\"white\",\"white\",\"white\",\"white\",\"white\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"asian\",\"not reported\",\"white\",\"white\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"white\",\"white\",\"white\",\"not reported\",\"not reported\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"not reported\",\"white\",\"black or african american\",\"white\",\"not reported\",\"not \n",
-       "reported\",\"not reported\",\"not reported\",\"not reported\",\"white\",\"white\",\"white\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"not reported\",\"None\",\"white\",\"black or african \n",
-       "american\",\"white\",\"white\",\"white\",\"white\",\"not reported\",\"not reported\",\"white\",\"not reported\",\"white\",\"white\",\"not\n",
-       "reported\",\"white\",\"not reported\",\"white\",\"black or african american\",\"white\",\"white\",\"white\",\"not reported\",\"not \n",
-       "reported\",\"white\",\"not reported\",\"white\",\"white\",\"not reported\",\"not reported\",\"not reported\",\"not \n",
-       "reported\",\"white\",\"white\",\"not reported\",\"white\",\"white\",\"white\",\"white\",\"white\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"black or african american\",\"white\",\"white\",\"not reported\",\"black or african \n",
-       "american\",\"black or african american\",\"black or african american\",\"white\",\"not reported\"\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"asian\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"None\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"black or african \u001b[0m\n", - "\u001b[32mamerican\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not\u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"black or african \u001b[0m\n", - "\u001b[32mamerican\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "\n", - "Q.set_default_project_dataset(\"http://35.192.60.10:8080/\"\n", - "Q.set_host_url(\"gdc-bq-sample.dev\")\n", - "\n", - "d = Q('ResearchSubject.Specimen.specimen_type = \"slide\"').specimen.run(\n", - " filter=\"\"\"\n", - " id:r_id \n", - " species:things\n", - " sex:gender\n", - " race:me\n", - " ethnicity:like_race\n", - " days_to_birth:born\n", - " subject_associated_project\n", - " vital_status \n", - " days_to_death\n", - " cause_of_death \n", - " identifier\n", - " File.label\n", - " File.data_category\n", - " File.data_type\n", - " File.file_format\n", - " File.data_modality\"\"\",\n", - ")\n", - "\n", - "\n", - "d = d.auto_paginator(limit=4000,to_df=True)\n", - "# print(d.to_dataframe())\n", - "\n", - "print(d.join_as_str(key=\"me\", delimiter=\",\"))\n", - " \n", - " \n", - "import pandas as pd \n", - "pd.DataFrame(d[\"r_id\"]).value_counts().plot(kind=\"bar\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "b6a6ef1d-6b1f-4a54-a8e9-3d9ee2983f50", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "97aee5f58b7d42c4b7df2993cde304bf", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "31b7a275-1270-47ec-98f8-0b9ffba5cfbb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/mutation_tests (1).ipynb b/mutation_tests (1).ipynb deleted file mode 100644 index 1af4fa79..00000000 --- a/mutation_tests (1).ipynb +++ /dev/null @@ -1,410 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "2472f1c8-cbda-431a-92f6-a950f6aa500a", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "application/javascript": "/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */\n!function(e,t){\"use strict\";\"object\"==typeof module&&\"object\"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error(\"jQuery requires a window with a document\");return t(e)}:t(e)}(\"undefined\"!=typeof window?window:this,function(C,e){\"use strict\";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return\"function\"==typeof e&&\"number\"!=typeof e.nodeType&&\"function\"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement(\"script\");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+\"\":\"object\"==typeof e||\"function\"==typeof e?n[o.call(e)]||\"object\":typeof e}var f=\"3.6.0\",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&\"length\"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&(\"array\"===n||0===t||\"number\"==typeof t&&0+~]|\"+M+\")\"+M+\"*\"),U=new RegExp(M+\"|>\"),X=new RegExp(F),V=new RegExp(\"^\"+I+\"$\"),G={ID:new RegExp(\"^#(\"+I+\")\"),CLASS:new RegExp(\"^\\\\.(\"+I+\")\"),TAG:new RegExp(\"^(\"+I+\"|[*])\"),ATTR:new RegExp(\"^\"+W),PSEUDO:new RegExp(\"^\"+F),CHILD:new RegExp(\"^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\\\(\"+M+\"*(even|odd|(([+-]|)(\\\\d*)n|)\"+M+\"*(?:([+-]|)\"+M+\"*(\\\\d+)|))\"+M+\"*\\\\)|)\",\"i\"),bool:new RegExp(\"^(?:\"+R+\")$\",\"i\"),needsContext:new RegExp(\"^\"+M+\"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\\\(\"+M+\"*((?:-\\\\d)?\\\\d*)\"+M+\"*\\\\)|)(?=[^-]|$)\",\"i\")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\\d$/i,K=/^[^{]+\\{\\s*\\[native \\w/,Z=/^(?:#([\\w-]+)|(\\w+)|\\.([\\w-]+))$/,ee=/[+~]/,te=new RegExp(\"\\\\\\\\[\\\\da-fA-F]{1,6}\"+M+\"?|\\\\\\\\([^\\\\r\\\\n\\\\f])\",\"g\"),ne=function(e,t){var n=\"0x\"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\\0-\\x1f\\x7f]|^-?\\d)|^-$|[^\\0-\\x1f\\x7f-\\uFFFF\\w-]/g,ie=function(e,t){return t?\"\\0\"===e?\"\\ufffd\":e.slice(0,-1)+\"\\\\\"+e.charCodeAt(e.length-1).toString(16)+\" \":\"\\\\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&\"fieldset\"===e.nodeName.toLowerCase()},{dir:\"parentNode\",next:\"legend\"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],\"string\"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+\" \"]&&(!v||!v.test(t))&&(1!==p||\"object\"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute(\"id\"))?s=s.replace(re,ie):e.setAttribute(\"id\",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?\"#\"+s:\":scope\")+\" \"+xe(l[o]);c=l.join(\",\")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute(\"id\")}}}return g(t.replace($,\"$1\"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+\" \")>b.cacheLength&&delete e[r.shift()],e[t+\" \"]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement(\"fieldset\");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split(\"|\"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return\"input\"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return(\"input\"===t||\"button\"===t)&&e.type===n}}function ge(t){return function(e){return\"form\"in e?e.parentNode&&!1===e.disabled?\"label\"in e?\"label\"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:\"label\"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&\"undefined\"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||\"HTML\")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener(\"unload\",oe,!1):n.attachEvent&&n.attachEvent(\"onunload\",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement(\"div\")),\"undefined\"!=typeof e.querySelectorAll&&!e.querySelectorAll(\":scope fieldset div\").length}),d.attributes=ce(function(e){return e.className=\"i\",!e.getAttribute(\"className\")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment(\"\")),!e.getElementsByTagName(\"*\").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute(\"id\")===t}},b.find.ID=function(e,t){if(\"undefined\"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t=\"undefined\"!=typeof e.getAttributeNode&&e.getAttributeNode(\"id\");return t&&t.value===n}},b.find.ID=function(e,t){if(\"undefined\"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode(\"id\"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode(\"id\"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return\"undefined\"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if(\"*\"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if(\"undefined\"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML=\"\",e.querySelectorAll(\"[msallowcapture^='']\").length&&v.push(\"[*^$]=\"+M+\"*(?:''|\\\"\\\")\"),e.querySelectorAll(\"[selected]\").length||v.push(\"\\\\[\"+M+\"*(?:value|\"+R+\")\"),e.querySelectorAll(\"[id~=\"+S+\"-]\").length||v.push(\"~=\"),(t=C.createElement(\"input\")).setAttribute(\"name\",\"\"),e.appendChild(t),e.querySelectorAll(\"[name='']\").length||v.push(\"\\\\[\"+M+\"*name\"+M+\"*=\"+M+\"*(?:''|\\\"\\\")\"),e.querySelectorAll(\":checked\").length||v.push(\":checked\"),e.querySelectorAll(\"a#\"+S+\"+*\").length||v.push(\".#.+[+~]\"),e.querySelectorAll(\"\\\\\\f\"),v.push(\"[\\\\r\\\\n\\\\f]\")}),ce(function(e){e.innerHTML=\"\";var t=C.createElement(\"input\");t.setAttribute(\"type\",\"hidden\"),e.appendChild(t).setAttribute(\"name\",\"D\"),e.querySelectorAll(\"[name=d]\").length&&v.push(\"name\"+M+\"*[*^$|!~]?=\"),2!==e.querySelectorAll(\":enabled\").length&&v.push(\":enabled\",\":disabled\"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(\":disabled\").length&&v.push(\":enabled\",\":disabled\"),e.querySelectorAll(\"*,:x\"),v.push(\",.*:\")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,\"*\"),c.call(e,\"[s!='']:x\"),s.push(\"!=\",F)}),v=v.length&&new RegExp(v.join(\"|\")),s=s.length&&new RegExp(s.join(\"|\")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+\" \"]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0\":{dir:\"parentNode\",first:!0},\" \":{dir:\"parentNode\"},\"+\":{dir:\"previousSibling\",first:!0},\"~\":{dir:\"previousSibling\"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||\"\").replace(te,ne),\"~=\"===e[2]&&(e[3]=\" \"+e[3]+\" \"),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),\"nth\"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*(\"even\"===e[3]||\"odd\"===e[3])),e[5]=+(e[7]+e[8]||\"odd\"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||\"\":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(\")\",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return\"*\"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+\" \"];return t||(t=new RegExp(\"(^|\"+M+\")\"+e+\"(\"+M+\"|$)\"))&&m(e,function(e){return t.test(\"string\"==typeof e.className&&e.className||\"undefined\"!=typeof e.getAttribute&&e.getAttribute(\"class\")||\"\")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?\"!=\"===r:!r||(t+=\"\",\"=\"===r?t===i:\"!=\"===r?t!==i:\"^=\"===r?i&&0===t.indexOf(i):\"*=\"===r?i&&-1\",\"#\"===e.firstChild.getAttribute(\"href\")})||fe(\"type|href|height|width\",function(e,t,n){if(!n)return e.getAttribute(t,\"type\"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML=\"\",e.firstChild.setAttribute(\"value\",\"\"),\"\"===e.firstChild.getAttribute(\"value\")})||fe(\"value\",function(e,t,n){if(!n&&\"input\"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute(\"disabled\")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);S.find=d,S.expr=d.selectors,S.expr[\":\"]=S.expr.pseudos,S.uniqueSort=S.unique=d.uniqueSort,S.text=d.getText,S.isXMLDoc=d.isXML,S.contains=d.contains,S.escapeSelector=d.escape;var h=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&S(e).is(n))break;r.push(e)}return r},T=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},k=S.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var N=/^<([a-z][^\\/\\0>:\\x20\\t\\r\\n\\f]*)[\\x20\\t\\r\\n\\f]*\\/?>(?:<\\/\\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):\"string\"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,\"string\"==typeof e){if(!(r=\"<\"===e[0]&&\">\"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\\x20\\t\\r\\n\\f]*)/i,he=/^$|^module$|\\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement(\"div\")),(fe=E.createElement(\"input\")).setAttribute(\"type\",\"radio\"),fe.setAttribute(\"checked\",\"checked\"),fe.setAttribute(\"name\",\"t\"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML=\"\",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML=\"\",y.option=!!ce.lastChild;var ge={thead:[1,\"\",\"
\"],col:[2,\"\",\"
\"],tr:[2,\"\",\"
\"],td:[3,\"\",\"
\"],_default:[0,\"\",\"\"]};function ve(e,t){var n;return n=\"undefined\"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||\"*\"):\"undefined\"!=typeof e.querySelectorAll?e.querySelectorAll(t||\"*\"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n\",\"\"]);var me=/<|&#?\\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\\s*$/g;function je(e,t){return A(e,\"table\")&&A(11!==t.nodeType?t:t.firstChild,\"tr\")&&S(e).children(\"tbody\")[0]||e}function De(e){return e.type=(null!==e.getAttribute(\"type\"))+\"/\"+e.type,e}function qe(e){return\"true/\"===(e.type||\"\").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute(\"type\"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,\"handle events\"),s)for(n=0,r=s[i].length;n\").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on(\"load error\",i=function(e){r.remove(),i=null,e&&t(\"error\"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\\?(?=&|$)|\\?\\?/;S.ajaxSetup({jsonp:\"callback\",jsonpCallback:function(){var e=zt.pop()||S.expando+\"_\"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter(\"json jsonp\",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?\"url\":\"string\"==typeof e.data&&0===(e.contentType||\"\").indexOf(\"application/x-www-form-urlencoded\")&&Ut.test(e.data)&&\"data\");if(a||\"jsonp\"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,\"$1\"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?\"&\":\"?\")+e.jsonp+\"=\"+r),e.converters[\"script json\"]=function(){return o||S.error(r+\" was not called\"),o[0]},e.dataTypes[0]=\"json\",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),\"script\"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument(\"\").body).innerHTML=\"
\",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return\"string\"!=typeof e?[]:(\"boolean\"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument(\"\")).createElement(\"base\")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(\" \");return-1\").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,\"position\"),c=S(e),f={};\"static\"===l&&(e.style.position=\"relative\"),s=c.offset(),o=S.css(e,\"top\"),u=S.css(e,\"left\"),(\"absolute\"===l||\"fixed\"===l)&&-1<(o+u).indexOf(\"auto\")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),\"using\"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if(\"fixed\"===S.css(r,\"position\"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&\"static\"===S.css(e,\"position\"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,\"borderTopWidth\",!0),i.left+=S.css(e,\"borderLeftWidth\",!0))}return{top:t.top-i.top-S.css(r,\"marginTop\",!0),left:t.left-i.left-S.css(r,\"marginLeft\",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&\"static\"===S.css(e,\"position\"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:\"pageXOffset\",scrollTop:\"pageYOffset\"},function(t,i){var o=\"pageYOffset\"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each([\"top\",\"left\"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+\"px\":t})}),S.each({Height:\"height\",Width:\"width\"},function(a,s){S.each({padding:\"inner\"+a,content:s,\"\":\"outer\"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||\"boolean\"!=typeof e),i=r||(!0===e||!0===t?\"margin\":\"border\");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf(\"outer\")?e[\"inner\"+a]:e.document.documentElement[\"client\"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body[\"scroll\"+a],r[\"scroll\"+a],e.body[\"offset\"+a],r[\"offset\"+a],r[\"client\"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each([\"ajaxStart\",\"ajaxStop\",\"ajaxComplete\",\"ajaxError\",\"ajaxSuccess\",\"ajaxSend\"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,\"**\"):this.off(t,e||\"**\",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each(\"blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu\".split(\" \"),function(e,n){S.fn[n]=function(e,t){return 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
2023.6.13\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m2023.6\u001b[0m.\u001b[1;36m13\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import (\n", - " Q, columns, unique_terms)\n", - "import numpy as np\n", - "import pandas as pd\n", - "from itables import init_notebook_mode, show\n", - "init_notebook_mode(all_interactive=True)\n", - "import itables.options as opt\n", - "opt.maxBytes=0\n", - "opt.scrollX=\"200px\"\n", - "opt.scrollCollapse=True\n", - "opt.paging=True\n", - "opt.maxColumns=0\n", - "print(Q.get_version())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d105760b", - "metadata": {}, - "outputs": [], - "source": [ - "query1 = Q('treatment_anatomic_site = \"Cervix\"')\n", - "query2 = Q('primary_diagnosis_site = \"%uter%\" OR primary_diagnosis_site = \"%cerv%\"')\n", - "query3 = Q('primary_diagnosis_condition != \"Adenomas and Adenocarcinomas\"')\n", - "print(query1.OR(query2).AND(query3).to_json())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c17061f6", - "metadata": {}, - "outputs": [], - "source": [ - "print(Q(\"\"\"\n", - "treatment_anatomic_site = \"Cervix\" OR \n", - "primary_diagnosis_site = \"%uter%\" OR primary_diagnosis_site = \"%cerv%\"\n", - "AND primary_diagnosis_condition != \"Adenomas and Adenocarcinomas\"\n", - " \"\"\").to_json())\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9698f51c", - "metadata": {}, - "outputs": [], - "source": [ - "columns().to_list(filters=\"TP53\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a7693974-65a0-45d9-a958-38f08cfd0661", - "metadata": { - "scrolled": false, - "tags": [] - }, - "outputs": [], - "source": [ - "unique_terms(\"TP53\", show_counts=True).to_dataframe()" - ] - }, - { - "cell_type": "markdown", - "id": "3d4c94ac", - "metadata": {}, - "source": [ - "##### unique_terms(\"Gene\").to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "c3b4fcc1-cbf8-48bf-8ff0-ccb25a6e318e", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m integration_table \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mgdc-bq-sample.dev\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> 4\u001b[0m \u001b[39mprint\u001b[39m(Q(\u001b[39m\"\u001b[39;49m\u001b[39m SYMBOL LIKE \u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mTP53\u001b[39;49m\u001b[39m%\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mspecimen\u001b[39m.\u001b[39;49mrun(show_sql\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,host\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mhttp://localhost:8080\u001b[39;49m\u001b[39m\"\u001b[39;49m,table\u001b[39m=\u001b[39;49mintegration_table))\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/decorators/measure.py:30\u001b[0m, in \u001b[0;36mMeasure.__call__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mkwargs \u001b[39m=\u001b[39m kwargs\n\u001b[1;32m 29\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 30\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 31\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult\n\u001b[1;32m 32\u001b[0m \u001b[39mfinally\u001b[39;00m:\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Q.py:673\u001b[0m, in \u001b[0;36mQ.run\u001b[0;34m(self, offset, limit, limit, version, host, dry_run, table, async_call, verify, verbose, include, format_type, show_sql)\u001b[0m\n\u001b[1;32m 670\u001b[0m dryClass \u001b[39m=\u001b[39m DryClass(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mapi_response\u001b[39m.\u001b[39mto_dict())\n\u001b[1;32m 671\u001b[0m \u001b[39mreturn\u001b[39;00m dryClass\n\u001b[0;32m--> 673\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__get_query_result(\n\u001b[1;32m 674\u001b[0m api_instance\u001b[39m=\u001b[39;49mapi_instance,\n\u001b[1;32m 675\u001b[0m query_id\u001b[39m=\u001b[39;49mapi_response\u001b[39m.\u001b[39;49mquery_id,\n\u001b[1;32m 676\u001b[0m offset\u001b[39m=\u001b[39;49mPAGEOFFSET,\n\u001b[1;32m 677\u001b[0m limit\u001b[39m=\u001b[39;49mlimit,\n\u001b[1;32m 678\u001b[0m async_req\u001b[39m=\u001b[39;49masync_call,\n\u001b[1;32m 679\u001b[0m show_sql\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_show_sql,\n\u001b[1;32m 680\u001b[0m show_count\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,\n\u001b[1;32m 681\u001b[0m format_type\u001b[39m=\u001b[39;49mformat_type,\n\u001b[1;32m 682\u001b[0m )\n\u001b[1;32m 683\u001b[0m \u001b[39mexcept\u001b[39;00m ServiceException \u001b[39mas\u001b[39;00m http_error:\n\u001b[1;32m 684\u001b[0m \u001b[39mif\u001b[39;00m verbose:\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Q.py:567\u001b[0m, in \u001b[0;36mQ.__get_query_result\u001b[0;34m(self, api_instance, query_id, offset, limit, async_req, pre_stream, show_sql, show_count, format_type)\u001b[0m\n\u001b[1;32m 564\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(response, ApplyResult):\n\u001b[1;32m 565\u001b[0m response \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mget()\n\u001b[0;32m--> 567\u001b[0m sleep(\u001b[39m2.5\u001b[39;49m)\n\u001b[1;32m 568\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mtotal_row_count \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 569\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_result_object(\n\u001b[1;32m 570\u001b[0m api_response\u001b[39m=\u001b[39mresponse,\n\u001b[1;32m 571\u001b[0m query_id\u001b[39m=\u001b[39mquery_id,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 577\u001b[0m format_type\u001b[39m=\u001b[39mformat_type,\n\u001b[1;32m 578\u001b[0m )\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "integration_table = \"gdc-bq-sample.dev\"\n", - "\n", - "\n", - "print(Q(\" SYMBOL LIKE 'TP53%'\").specimen.run(show_sql=True,host=\"http://localhost:8080\",table=integration_table))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f27ed7b7-3ff5-40ed-9144-874402ccc7d5", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df = results.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b33c502f-cf4c-40e6-a2e2-95ff75b50007", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "len(df)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58563df0", - "metadata": {}, - "outputs": [], - "source": [ - "all_entrez" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0a5e9d8e", - "metadata": {}, - "outputs": [], - "source": [ - "all_entrez.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ac9c36b-7307-4e4d-a033-90afc461877c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df_ids = all_entrez.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "decc579d", - "metadata": {}, - "outputs": [], - "source": [ - "df_ids" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79fa3900-a1f2-44c0-82be-6781cc895e5e", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "temp = list(df_ids['Entrez_Gene_Id'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf7dc5b6", - "metadata": {}, - "outputs": [], - "source": [ - "temp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f07fb86b-caa3-4d31-81f7-95648720d21d", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "with open('temp.txt', 'w') as outf: outf.write('\\n'.join(map(str, temp)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "68e55217-84a5-4c96-85b1-2a3786d3578a", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "tp53 = Q(\"Gene = 'TP53'\")\n", - "results2 = tp53.researchsubject.run()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9cda16d1-1036-4a71-afc9-3a427da3a70a", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df2 = results2.to_dataframe()\n", - "df2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e93fe903-0ffd-489a-ad86-3dc98068331c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "len(df2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c861b958-a106-44a6-af8c-e86b391b7ab5", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "all_genes = unique_terms(\"Gene\").get_all()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "61a87143-933b-4987-814d-ffa77e6fc713", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df_genes = all_genes.to_dataframe()\n", - "temp2 = df_genes['Gene']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ba9e0490-27fb-4ba2-8ef3-4e82255f70ce", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "with open('temp2.txt', 'w') as outf: outf.write('\\n'.join(map(str, temp2)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "800ab924", - "metadata": {}, - "outputs": [], - "source": [ - "columns().to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e72430b1-c528-42b0-819a-bf9e1a0810d6", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"subject_associated_project\").to_dataframe().to_csv(\"allprojects.tsv\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/.DS_Store b/notebooks/.DS_Store deleted file mode 100644 index 0949ce4dfb647c9ea5f734ba7a35bdd19c8b7529..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKyG{f#44l9fCn2OOC@Av-oJ2_%PDeq_55NKi(!wDg5_I`1J`ZN>fNo)FPXgJJ zJx**V*^xF60BP}jbqLG=OsT>oXGGLJ8ro{f!enFIqsKK4@I?DJL~GyUYF}`HTdc4D z(Z1pVbKJ216?f=P`_*OFuR7LZUc)sYcO@=~IgruM{0 zUe5m9@=C+%=;cT@GskH&e?DH4&CdR8=}5&fY%mZE3>nz==1k}R34fWyOXVS(~ZnREhGc918uxC4dd>Bl{Ngc$Uw+(y%&e6!lv\n", - "Julia is an oncologist that specializes in female reproductive health. As part of her research, she is interested in using existing data on uterine cancers. If possible, she would like to see multiple datatypes (gross imaging, genomic data, proteomic data, histology) that come from the same patient, so she can look for shared phenotypes to test for their potential as early diagnostics. Julia heard that the Cancer Data Aggregator has made it easy to search across multiple datasets created by NCI, and so has decided to start her search there.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "14e45a9b", - "metadata": {}, - "source": [ - "## Getting Started\n", - "\n", - "The CDA provides a custom python tool for searching CDA data. [`Q`](usage/#q) (short for Query) offers several ways to search and filter data, and several input modes:\n", - "\n", - "---\n", - "- **Q.()** builds a query that can be used by `run()` or `count()`\n", - "- **Q.run()** returns data for the specified search \n", - "- **Q.count()** returns summary information (counts) data that fit the specified search\n", - "- **columns()** returns entity field names\n", - "- **unique_terms()** returns entity field contents\n", - "\n", - "---\n", - "\n", - "Before Julia does any work, she needs to import these functions cdapython.\n", - "She'll also need to import [pandas](https://pandas.pydata.org/) to get nice dataframes.\n", - "Finally, she tells cdapython to report it's version so she can be sure she's using the one she means to:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "a5265d4d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
2022.9.15\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m2022.9\u001b[0m.\u001b[1;36m15\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import Q, columns, unique_terms, query\n", - "import cdapython\n", - "import pandas as pd \n", - "print(cdapython.__version__)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "c577d416", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'https://cancerdata.dsde-dev.broadinstitute.org/'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Q.set_default_project_dataset(\"broad-dsde-dev.cda_dev\")\n", - "Q.set_host_url(\"https://cancerdata.dsde-dev.broadinstitute.org/\")\n", - "Q.get_host_url()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "7a4e1b8b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'broad-dsde-dev.cda_dev'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Q.get_default_project_dataset()" - ] - }, - { - "cell_type": "markdown", - "id": "75eef23e", - "metadata": {}, - "source": [ - "
\n", - " \n", - " \n", - "CDA data comes from three sources:\n", - " \n", - " \n", - "The CDA makes this data searchable in four main endpoints:\n", - "\n", - "
    \n", - "
  • subject: A patient entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subjects privacy.
  • \n", - "
  • researchsubject: A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subjects privacy. This entity plays the role of the case_id in existing data. An individual who participates in 3 studies will have 3 researchsubject IDs
  • \n", - "
  • specimen: Any material taken as a sample from a biological entity (living or dead), or from a physical object or the environment. Specimens are usually collected as an example of their kind, often for use in some investigation.
  • \n", - "
  • file: A unit of data about subjects, researchsubjects, specimens, or their associated information
  • \n", - "
\n", - "and two endpoints that offer deeper information about data in the researchsubject endpoint:\n", - "
    \n", - "
  • diagnosis: A collection of characteristics that describe an abnormal condition of the body as assessed at a point in time. May be used to capture information about neoplastic and non-neoplastic conditions.
  • \n", - "
  • treatment: Represent medication administration or other treatment types.
  • \n", - "
\n", - "Any metadata field can be searched from any endpoint, the only difference between search types is what type of data is returned by default. This means that you can think of the CDA as a really, really enormous spreadsheet full of data. To search this enormous spreadsheet, you'd want select columns, and then filter rows.\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "id": "391bc9a7", - "metadata": {}, - "source": [ - "## Finding Search Terms\n", - "\n", - "\n", - " \n", - " \n", - " Accordingly, to see what search fields are available, Julia starts by using the command `columns`:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ef0dd8e5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n",
-       "    {'AA_MAF': 'Non-reference allele and frequency of existing variant in NHLBI-ESP African American population'},\n",
-       "    {\n",
-       "        'AFR_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined African \n",
-       "population'\n",
-       "    },\n",
-       "    {'ALLELE_NUM': 'Allele number from input; 0 is reference, 1 is first alternate etc.'},\n",
-       "    {\n",
-       "        'AMR_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined American \n",
-       "population'\n",
-       "    },\n",
-       "    {'Allele': 'The variant allele used to calculate the consequence'},\n",
-       "    {\n",
-       "        'Amino_acids': 'Amino acid substitution caused by the mutation. Only given if the variation affects the \n",
-       "protein-coding sequence'\n",
-       "    },\n",
-       "    {'BIOTYPE': 'Biotype of transcript'},\n",
-       "    {\n",
-       "        'CANONICAL': 'A flag (YES) indicating that the VEP-based canonical transcript, the longest translation, was\n",
-       "used for this gene. If not, the value is null'\n",
-       "    },\n",
-       "    {'CCDS': 'The  CCDS identifier for this transcript, where applicable'},\n",
-       "    {\n",
-       "        'CDS_position': 'Relative position of base pair in coding sequence. A - symbol is displayed as the \n",
-       "numerator if the variant does not appear in coding sequence'\n",
-       "    },\n",
-       "    {'CLIN_SIG': 'Clinical significance of variant from dbSNP'},\n",
-       "    {'CONTEXT': 'The reference allele per VCF specs, and its five flanking base pairs'},\n",
-       "    {'COSMIC': 'Overlapping COSMIC variants'},\n",
-       "    {'Center': 'One or more genome sequencing center reporting the variant'},\n",
-       "    {'Chromosome': 'Chromosome, possible values: chr1-22, and chrX'},\n",
-       "    {'Codons': 'The alternative codons with the variant base in upper case'},\n",
-       "    {'Consequence': 'Consequence type of this variant; sequence ontology terms'},\n",
-       "    {'DISTANCE': 'Shortest distance from the variant to transcript'},\n",
-       "    {'DOMAINS': 'The source and identifier of any overlapping protein domains'},\n",
-       "    {\n",
-       "        'EAS_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined East Asian \n",
-       "population'\n",
-       "    },\n",
-       "    {\n",
-       "        'EA_MAF': 'Non-reference allele and frequency of existing variant in NHLBI-ESP European American \n",
-       "population'\n",
-       "    },\n",
-       "    {'ENSP': 'The Ensembl protein identifier of the affected transcript'},\n",
-       "    {\n",
-       "        'EUR_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined European \n",
-       "population'\n",
-       "    },\n",
-       "    {'EXON': 'The exon number (out of total number)'},\n",
-       "    {\n",
-       "        'End_Position': 'Highest numeric genomic position of the reported variant on the genomic reference \n",
-       "sequence. Mutation end coordinate'\n",
-       "    },\n",
-       "    {\n",
-       "        'Entrez_Gene_Id': 'Entrez gene ID (an integer). 0 is used for regions that do not correspond to a gene \n",
-       "region or Ensembl ID'\n",
-       "    },\n",
-       "    {'ExAC_AF': 'Global Allele Frequency from   ExAC'},\n",
-       "    {'ExAC_AF_AFR': 'African/African American Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_AMR': 'American Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_Adj': 'Adjusted Global Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_EAS': 'East Asian Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_FIN': 'Finnish Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_NFE': 'Non-Finnish European Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_OTH': 'Other Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_SAS': 'South Asian Allele Frequency from ExAC'},\n",
-       "    {'Existing_variation': 'Known identifier of existing variation'},\n",
-       "    {'Exon_Number': 'The exon number (out of total number)'},\n",
-       "    {\n",
-       "        'FILTER': 'Copied from input VCF. This includes filters implemented directly by the variant caller and \n",
-       "other external software used in the DNA-Seq pipeline. See below for additional details.'\n",
-       "    },\n",
-       "    {'Feature': 'Stable Ensembl ID of feature (transcript, regulatory, motif)'},\n",
-       "    {'Feature_type': 'Type of feature. Currently one of Transcript, RegulatoryFeature, MotifFeature (or blank)'},\n",
-       "    {'GDC_FILTER': 'GDC filters applied universally across all MAFs'},\n",
-       "    {\n",
-       "        'GDC_Validation_Status': 'GDC implementation of validation checks. See notes section (#5) below for \n",
-       "details'\n",
-       "    },\n",
-       "    {'GMAF': 'Non-reference allele and frequency of existing variant in   1000 Genomes'},\n",
-       "    {'Gene': 'The gene symbol. In this table, gene symbol is gene name e.g. ACADVL'},\n",
-       "    {'HGNC_ID': 'Gene identifier from the HUGO Gene Nomenclature Committee if applicable'},\n",
-       "    {'HGVS_OFFSET': 'Indicates by how many bases the HGVS notations for this variant have been shifted'},\n",
-       "    {'HGVSc': 'The coding sequence of the variant in HGVS recommended format'},\n",
-       "    {\n",
-       "        'HGVSp': 'The protein sequence of the variant in HGVS recommended format. p.= signifies no change in the \n",
-       "protein'\n",
-       "    },\n",
-       "    {'HGVSp_Short': 'Same as the HGVSp column, but using 1-letter amino-acid codes'},\n",
-       "    {\n",
-       "        'Hugo_Symbol': 'HUGO symbol for the gene (HUGO symbols are always in all caps). Unknown is used for regions\n",
-       "that do not correspond to a gene'\n",
-       "    },\n",
-       "    {'IMPACT': 'The impact modifier for the consequence type'},\n",
-       "    {'INTRON': 'The intron number (out of total number)'},\n",
-       "    {'MC3_Overlap': 'Indicates whether this region overlaps with an MC3 variant for the same sample pair'},\n",
-       "    {\n",
-       "        'MINIMISED': 'Alleles in this variant have been converted to minimal representation before consequence \n",
-       "calculation (1 or null)'\n",
-       "    },\n",
-       "    {'Matched_Norm_Sample_UUID': 'Unique GDC identifier for normal aliquot (10189 unique)'},\n",
-       "    {\n",
-       "        'Mutation_Status': 'An assessment of the mutation as somatic, germline, LOH, post transcriptional \n",
-       "modification, unknown, or none. The values allowed in this field are constrained by the value in the \n",
-       "Validation_Status field'\n",
-       "    },\n",
-       "    {'NCBI_Build': 'The reference genome used for the alignment (GRCh38)'},\n",
-       "    {\n",
-       "        'One_Consequence': 'The single consequence of the canonical transcript in  sequence ontology terms, eg \n",
-       "missense_variant'\n",
-       "    },\n",
-       "    {'PHENO': 'Indicates if existing variant is associated with a phenotype, disease or trait (0, 1, or null)'},\n",
-       "    {'PICK': \"Indicates if this block of consequence data was picked by VEP's   pick feature (1 or null)\"},\n",
-       "    {'PUBMED': 'Pubmed ID(s) of publications that cite existing variant'},\n",
-       "    {'PolyPhen': 'The PolyPhen prediction and/or score'},\n",
-       "    {\n",
-       "        'Protein_position': 'Relative position of affected amino acid in protein. A - symbol is displayed as the \n",
-       "numerator if the variant does not appear in coding sequence'\n",
-       "    },\n",
-       "    {'RefSeq': 'RefSeq identifier for this transcript'},\n",
-       "    {\n",
-       "        'Reference_Allele': 'The plus strand reference allele at this position. Includes the deleted sequence for a\n",
-       "deletion or - for an insertion'\n",
-       "    },\n",
-       "    {\n",
-       "        'SAS_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined South Asian \n",
-       "population'\n",
-       "    },\n",
-       "    {'SIFT': 'The   SIFT prediction and/or score, with both given as prediction (score)'},\n",
-       "    {'SOMATIC': 'Somatic status of each ID reported under Existing_variation (0, 1, or null)'},\n",
-       "    {'SWISSPROT': 'UniProtKB/Swiss-Prot accession'},\n",
-       "    {'SYMBOL': 'Eg TP53, LRP1B, etc (same as Hugo_Symbol field except blank instead of Unknown'},\n",
-       "    {\n",
-       "        'SYMBOL_SOURCE': 'The source of the gene symbol, usually HGNC, rarely blank, other sources include \n",
-       "Uniprot_gn, EntrezGene, etc'\n",
-       "    },\n",
-       "    {'Sequencer': 'Instrument used to produce primary sequence data'},\n",
-       "    {\n",
-       "        'Start_Position': 'Lowest numeric position of the reported variant on the genomic reference sequence. \n",
-       "Mutation start coordinate'\n",
-       "    },\n",
-       "    {'Strand': 'Either + or - to denote whether read mapped to the sense (+) or anti-sense (-) strand'},\n",
-       "    {'TRANSCRIPT_STRAND': 'The DNA strand (1 or -1) on which the transcript/feature lies'},\n",
-       "    {'TREMBL': 'UniProtKB/TrEMBL identifier of protein product'},\n",
-       "    {'TSL': 'Transcript support level, which is based on independent RNA analyses'},\n",
-       "    {'Transcript_ID': 'Ensembl ID of the transcript affected by the variant'},\n",
-       "    {'Tumor_Sample_UUID': 'Unique GDC identifier for tumor aliquot (10189 unique)'},\n",
-       "    {\n",
-       "        'Tumor_Seq_Allele1': 'Primary data genotype for tumor sequencing (discovery) allele 1. A - symbol for a \n",
-       "deletion represents a variant. A - symbol for an insertion represents wild-type allele. Novel inserted sequence for\n",
-       "insertion does not include flanking reference bases'\n",
-       "    },\n",
-       "    {\n",
-       "        'Tumor_Seq_Allele2': 'Primary data genotype for tumor sequencing (discovery) allele 2. A - symbol for a \n",
-       "deletion represents a variant. A - symbol for an insertion represents wild-type allele. Novel inserted sequence for\n",
-       "insertion does not include flanking reference bases'\n",
-       "    },\n",
-       "    {\n",
-       "        'Tumor_Validation_Allele1': 'Secondary data from orthogonal technology. Tumor genotyping (validation) for \n",
-       "allele 1. A - symbol for a deletion represents a variant. A - symbol for an insertion represents wild-type allele. \n",
-       "Novel inserted sequence for insertion does not include flanking reference bases'\n",
-       "    },\n",
-       "    {\n",
-       "        'Tumor_Validation_Allele2': 'Secondary data from orthogonal technology. Tumor genotyping (validation) for \n",
-       "allele 2'\n",
-       "    },\n",
-       "    {'UNIPARC': 'UniParc identifier of protein product'},\n",
-       "    {'VARIANT_CLASS': 'Sequence Ontology variant class'},\n",
-       "    {'Validation_Method': 'The assay platforms used for the validation call'},\n",
-       "    {'Variant_Classification': 'Translational effect of variant allele'},\n",
-       "    {\n",
-       "        'Variant_Type': 'Type of mutation. TNP (tri-nucleotide polymorphism) is analogous to DNP (di-nucleotide \n",
-       "polymorphism) but for three consecutive nucleotides. ONP (oligo-nucleotide polymorphism) is analogous to TNP but \n",
-       "for consecutive runs of four or more (SNP, DNP, TNP, ONP, INS, DEL, or Consolidated)'\n",
-       "    },\n",
-       "    {'age_at_diagnosis': 'The age in days of the individual at the time of diagnosis'},\n",
-       "    {'aliquot_barcode_normal': 'TCGA aliquot barcode for the normal control, eg TCGA-12-1089-01A-01D-0517-01'},\n",
-       "    {'aliquot_barcode_tumor': 'TCGA aliquot barcode for the tumor, eg TCGA-12-1089-01A-01D-0517-01'},\n",
-       "    {\n",
-       "        'all_effects': 'A semicolon delimited list of all possible variant effects, sorted by priority \n",
-       "([Symbol,Consequence,HGVSp_Short,Transcript_ID,RefSeq,HGVSc,Impact,Canonical,Sift,PolyPhen,Strand])'\n",
-       "    },\n",
-       "    {\n",
-       "        'anatomical_site': 'Per GDC Dictionary, the text term that represents the name of the primary disease site \n",
-       "of the submitted tumor sample; recommend dropping tumor; biospecimen_anatomic_site.'\n",
-       "    },\n",
-       "    {'byte_size': 'Size of the file in bytes. Maps to dcat:byteSize.'},\n",
-       "    {\n",
-       "        'cDNA_position': 'Relative position of base pair in the cDNA sequence as a fraction. A - symbol is \n",
-       "displayed as the numerator if the variant does not appear in cDNA'\n",
-       "    },\n",
-       "    {\n",
-       "        'callerName': '|-delimited list of mutation caller(s) that agreed on this particular call, always in \n",
-       "alphabetical order: muse, mutect, somaticsniper, varscan'\n",
-       "    },\n",
-       "    {'case_barcode': 'Original TCGA case barcode, eg TCGA-DX-A8BN'},\n",
-       "    {'case_id': 'Unique GDC identifier for the underlying case'},\n",
-       "    {\n",
-       "        'cause_of_death': 'Coded value indicating the circumstance or condition that results in the death of the \n",
-       "subject.'\n",
-       "    },\n",
-       "    {\n",
-       "        'checksum': 'A digit representing the sum of the correct digits in a piece of stored or transmitted digital\n",
-       "data, against which later comparisons can be made to detect errors in the data.'\n",
-       "    },\n",
-       "    {'data_category': 'Broad categorization of the contents of the data file.'},\n",
-       "    {\n",
-       "        'data_modality': 'Data modality describes the biological nature of the information gathered as the result \n",
-       "of an Activity, independent of the technology or methods used to produce the information.'\n",
-       "    },\n",
-       "    {'data_type': 'Specific content type of the data file.'},\n",
-       "    {\n",
-       "        'days_to_birth': \"Number of days between the date used for index and the date from a person's date of birth\n",
-       "represented as a calculated negative number of days.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'days_to_collection': 'The number of days from the index date to either the date a sample was collected for\n",
-       "a specific study or project, or the date a patient underwent a procedure (e.g. surgical resection) yielding a \n",
-       "sample that was eventually used for research.'\n",
-       "    },\n",
-       "    {\n",
-       "        'days_to_death': \"Number of days between the date used for index and the date from a person's date of death\n",
-       "represented as a calculated number of days.\"\n",
-       "    },\n",
-       "    {'days_to_treatment_end': ' The timepoint at which the treatment ended.'},\n",
-       "    {'days_to_treatment_start': 'The timepoint at which the treatment started.'},\n",
-       "    {\n",
-       "        'dbSNP_RS': 'The rs-IDs from the   dbSNP database, novel if not found in any database used, or null if \n",
-       "there is no dbSNP record, but it is found in other databases'\n",
-       "    },\n",
-       "    {\n",
-       "        'dbSNP_Val_Status': 'The dbSNP validation status is reported as a semicolon-separated list of statuses. The\n",
-       "union of all rs-IDs is taken when there are multiple'\n",
-       "    },\n",
-       "    {'dbgap_accession_number': 'The dbgap accession number for the project.'},\n",
-       "    {'derived_from_specimen': 'A source/parent specimen from which this one was directly derived.'},\n",
-       "    {\n",
-       "        'derived_from_subject': 'The Patient/ResearchSubject, or Biologically Derived Materal (e.g. a cell line, \n",
-       "tissue culture, organoid) from which the specimen was directly or indirectly derived.'\n",
-       "    },\n",
-       "    {\n",
-       "        'diagnosis_id': \"The 'logical' identifier of the entity in the repository, e.g. a UUID.  This 'id' is \n",
-       "unique within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'diagnosis_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'diagnosis_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {'drs_uri': 'A string of characters used to identify a resource on the Data Repo Service(DRS).'},\n",
-       "    {\n",
-       "        'ethnicity': \"An individual's self-described social and cultural grouping, specifically whether an \n",
-       "individual describes themselves as Hispanic or Latino. The provided values are based on the categories defined by \n",
-       "the U.S. Office of Management and Business and used by the U.S. Census Bureau.\"\n",
-       "    },\n",
-       "    {'fileName': '|-delimited list of name of underlying MAF file'},\n",
-       "    {'fileUUID': '|-delimited list of unique GDC identifiers for underlying MAF file'},\n",
-       "    {\n",
-       "        'file_associated_project': 'A reference to the Project(s) of which this ResearchSubject is a member. The \n",
-       "associated_project may be embedded using the $ref definition or may be a reference to the id for the Project - or a\n",
-       "URI expressed as a string to an existing entity.'\n",
-       "    },\n",
-       "    {'file_format': 'Format of the data files.'},\n",
-       "    {\n",
-       "        'file_id': \"The 'logical' identifier of the entity in the repository, e.g. a UUID.  This 'id' is unique \n",
-       "within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'file_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'file_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {\n",
-       "        'grade': 'The degree of abnormality of cancer cells, a measure of differentiation, the extent to which \n",
-       "cancer cells are similar in appearance and function to healthy cells of the same tissue type. The degree of \n",
-       "differentiation often relates to the clinical behavior of the particular tumor. Based on the microscopic findings, \n",
-       "tumor grade is commonly described by one of four degrees of severity. Histopathologic grade of a tumor may be used \n",
-       "to plan treatment and estimate the future course, outcome, and overall prognosis of disease. Certain types of \n",
-       "cancers, such as soft tissue sarcoma, primary brain tumors, lymphomas, and breast have special grading systems.'\n",
-       "    },\n",
-       "    {\n",
-       "        'imaging_modality': 'An imaging modality describes the imaging equipment and/or method used to acquire \n",
-       "certain structural or functional information about the body. These include but are not limited to computed \n",
-       "tomography (CT) and magnetic resonance imaging (MRI). Taken from the DICOM standard.'\n",
-       "    },\n",
-       "    {\n",
-       "        'imaging_series': \"The 'logical' identifier of the series or grouping of imaging files in the system of \n",
-       "record which the file is a part of.\"\n",
-       "    },\n",
-       "    {'label': 'Short name or abbreviation for dataset. Maps to rdfs:label.'},\n",
-       "    {'member_of_research_project': 'A reference to the Study(s) of which this ResearchSubject is a member.'},\n",
-       "    {'method_of_diagnosis': 'The method used to confirm the patients malignant diagnosis'},\n",
-       "    {\n",
-       "        'morphology': 'Code that represents the histology of the disease using the third edition of the \n",
-       "International Classification of Diseases for Oncology, published in 2000, used principally in tumor and cancer \n",
-       "registri'\n",
-       "    },\n",
-       "    {'n_depth': 'Read depth across this locus in normal BAM'},\n",
-       "    {'normal_bam_uuid': 'Unique GDC identifier for the underlying normal bam file'},\n",
-       "    {'number_of_cycles': 'The number of treatment cycles the subject received.'},\n",
-       "    {'primary_diagnosis': 'The diagnosis instance that qualified a subject for inclusion on a ResearchProject'},\n",
-       "    {\n",
-       "        'primary_diagnosis_condition': \"The text term used to describe the type of malignant disease, as \n",
-       "categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O).\n",
-       "This attribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'primary_diagnosis_site': \"The text term used to describe the primary site of disease, as categorized by \n",
-       "the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This \n",
-       "categorization groups cases into general categories.  This attribute represents the primary site of disease that \n",
-       "qualified the subject for inclusion on the ResearchProject.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'primary_disease_type': \"The text term used to describe the type of malignant disease, as categorized by \n",
-       "the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O).   This \n",
-       "attribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'project_short_name': 'Project name abbreviation; the program name appended with a project name \n",
-       "abbreviation; eg. TCGA-OV, etc.'\n",
-       "    },\n",
-       "    {\n",
-       "        'race': 'An arbitrary classification of a taxonomic group that is a division of a species. It usually \n",
-       "arises as a consequence of geographical isolation within a species and is characterized by shared heredity, \n",
-       "physical attributes and behavior, and in the case of humans, by common history, nationality, or geographic \n",
-       "distribution. The provided values are based on the categories defined by the U.S. Office of Management and Business\n",
-       "and used by the U.S. Census Bureau.'\n",
-       "    },\n",
-       "    {'researchsubject_Files': 'List of ids of File entities associated with the ResearchSubject'},\n",
-       "    {\n",
-       "        'researchsubject_id': \"The 'logical' identifier of the entity in the system of record, e.g. a UUID.  This \n",
-       "'id' is unique within a given system. The identified entity may have a different 'id' in a different system. For \n",
-       "CDA, this is case_id.\"\n",
-       "    },\n",
-       "    {'researchsubject_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'researchsubject_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {\n",
-       "        'sample_barcode_normal': 'TCGA sample barcode for the normal control, eg TCGA-12-1089-01A. One sample may \n",
-       "have multiple sets of CN segmentations corresponding to multiple aliquots; use GROUP BY appropriately in queries'\n",
-       "    },\n",
-       "    {\n",
-       "        'sample_barcode_tumor': 'TCGA sample barcode for the tumor, eg TCGA-12-1089-01A. One sample may have \n",
-       "multiple sets of CN segmentations corresponding to multiple aliquots; use GROUP BY appropriately in queries'\n",
-       "    },\n",
-       "    {\n",
-       "        'sex': \"The biologic character or quality that distinguishes male and female from one another as expressed \n",
-       "by analysis of the person's gonadal, morphologic (internal and external), chromosomal, and hormonal \n",
-       "characteristics.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'source_material_type': 'The general kind of material from which the specimen was derived, indicating the \n",
-       "physical nature of the source material. '\n",
-       "    },\n",
-       "    {\n",
-       "        'species': 'The taxonomic group (e.g. species) of the patient. For MVP, since taxonomy vocabulary is \n",
-       "consistent between GDC and PDC, using text.  Ultimately, this will be a term returned by the vocabulary service.'\n",
-       "    },\n",
-       "    {'specimen_Files': 'List of ids of File entities associated with the Specimen'},\n",
-       "    {'specimen_associated_project': 'The Project associated with the specimen.'},\n",
-       "    {\n",
-       "        'specimen_id': \"The 'logical' identifier of the entity in the system of record, e.g. a UUID.  This 'id' is \n",
-       "unique within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'specimen_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'specimen_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {\n",
-       "        'specimen_type': 'The high-level type of the specimen, based on its how it has been derived from the \n",
-       "original extracted sample. \\n'\n",
-       "    },\n",
-       "    {'src_vcf_id': '|-delimited list of GDC VCF file identifiers'},\n",
-       "    {\n",
-       "        'stage': 'The extent of a cancer in the body. Staging is usually based on the size of the tumor, whether \n",
-       "lymph nodes contain cancer, and whether the cancer has spread from the original site to other parts of the body.'\n",
-       "    },\n",
-       "    {'subject_Files': 'List of ids of File entities associated with the Patient'},\n",
-       "    {'subject_associated_project': 'The list of Projects associated with the Subject.'},\n",
-       "    {\n",
-       "        'subject_id': \"The 'logical' identifier of the entity in the system of record, e.g. a UUID.  This 'id' is \n",
-       "unique within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'subject_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'subject_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {'t_alt_count': 'Read depth supporting the variant allele in tumor BAM'},\n",
-       "    {'t_depth': 'Read depth across this locus in tumor BAM'},\n",
-       "    {'t_ref_count': 'Read depth supporting the reference allele in tumor BAM'},\n",
-       "    {'therapeutic_agent': 'One or more therapeutic agents as part of this treatment.'},\n",
-       "    {'treatment_anatomic_site': 'The anatomical site that the treatment targets.'},\n",
-       "    {'treatment_effect': 'The effect of a treatment on the diagnosis or tumor.'},\n",
-       "    {'treatment_end_reason': 'The reason the treatment ended.'},\n",
-       "    {\n",
-       "        'treatment_id': \"The 'logical' identifier of the entity in the repository, e.g. a UUID.  This 'id' is \n",
-       "unique within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'treatment_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'treatment_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {'treatment_outcome': 'The final outcome of the treatment.'},\n",
-       "    {'treatment_type': 'The treatment type including medication/therapeutics or other procedures.'},\n",
-       "    {'tumor_bam_uuid': 'Unique GDC identifier for the underlying bam file'},\n",
-       "    {\n",
-       "        'vital_status': 'Coded value indicating the state or condition of being living or deceased; also includes \n",
-       "the case where the vital status is unknown.'\n",
-       "    }\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - " \u001b[1m{\u001b[0m\u001b[32m'AA_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in NHLBI-ESP African American population'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'AFR_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined African \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ALLELE_NUM'\u001b[0m: \u001b[32m'Allele number from input; 0 is reference, 1 is first alternate etc.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'AMR_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined American \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Allele'\u001b[0m: \u001b[32m'The variant allele used to calculate the consequence'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Amino_acids'\u001b[0m: \u001b[32m'Amino acid substitution caused by the mutation. Only given if the variation affects the \u001b[0m\n", - "\u001b[32mprotein-coding sequence'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'BIOTYPE'\u001b[0m: \u001b[32m'Biotype of transcript'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'CANONICAL'\u001b[0m: \u001b[32m'A flag \u001b[0m\u001b[32m(\u001b[0m\u001b[32mYES\u001b[0m\u001b[32m)\u001b[0m\u001b[32m indicating that the VEP-based canonical transcript, the longest translation, was\u001b[0m\n", - "\u001b[32mused for this gene. If not, the value is null'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'CCDS'\u001b[0m: \u001b[32m'The CCDS identifier for this transcript, where applicable'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'CDS_position'\u001b[0m: \u001b[32m'Relative position of base pair in coding sequence. A - symbol is displayed as the \u001b[0m\n", - "\u001b[32mnumerator if the variant does not appear in coding sequence'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'CLIN_SIG'\u001b[0m: \u001b[32m'Clinical significance of variant from dbSNP'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'CONTEXT'\u001b[0m: \u001b[32m'The reference allele per VCF specs, and its five flanking base pairs'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'COSMIC'\u001b[0m: \u001b[32m'Overlapping COSMIC variants'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Center'\u001b[0m: \u001b[32m'One or more genome sequencing center reporting the variant'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Chromosome'\u001b[0m: \u001b[32m'Chromosome, possible values: chr1-22, and chrX'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Codons'\u001b[0m: \u001b[32m'The alternative codons with the variant base in upper case'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Consequence'\u001b[0m: \u001b[32m'Consequence type of this variant; sequence ontology terms'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'DISTANCE'\u001b[0m: \u001b[32m'Shortest distance from the variant to transcript'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'DOMAINS'\u001b[0m: \u001b[32m'The source and identifier of any overlapping protein domains'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'EAS_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined East Asian \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'EA_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in NHLBI-ESP European American \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ENSP'\u001b[0m: \u001b[32m'The Ensembl protein identifier of the affected transcript'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'EUR_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined European \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'EXON'\u001b[0m: \u001b[32m'The exon number \u001b[0m\u001b[32m(\u001b[0m\u001b[32mout of total number\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'End_Position'\u001b[0m: \u001b[32m'Highest numeric genomic position of the reported variant on the genomic reference \u001b[0m\n", - "\u001b[32msequence. Mutation end coordinate'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Entrez_Gene_Id'\u001b[0m: \u001b[32m'Entrez gene ID \u001b[0m\u001b[32m(\u001b[0m\u001b[32man integer\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. 0 is used for regions that do not correspond to a gene \u001b[0m\n", - "\u001b[32mregion or Ensembl ID'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF'\u001b[0m: \u001b[32m'Global Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_AFR'\u001b[0m: \u001b[32m'African/African American Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_AMR'\u001b[0m: \u001b[32m'American Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_Adj'\u001b[0m: \u001b[32m'Adjusted Global Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_EAS'\u001b[0m: \u001b[32m'East Asian Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_FIN'\u001b[0m: \u001b[32m'Finnish Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_NFE'\u001b[0m: \u001b[32m'Non-Finnish European Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_OTH'\u001b[0m: \u001b[32m'Other Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_SAS'\u001b[0m: \u001b[32m'South Asian Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Existing_variation'\u001b[0m: \u001b[32m'Known identifier of existing variation'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Exon_Number'\u001b[0m: \u001b[32m'The exon number \u001b[0m\u001b[32m(\u001b[0m\u001b[32mout of total number\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'FILTER'\u001b[0m: \u001b[32m'Copied from input VCF. This includes filters implemented directly by the variant caller and \u001b[0m\n", - "\u001b[32mother external software used in the DNA-Seq pipeline. See below for additional details.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Feature'\u001b[0m: \u001b[32m'Stable Ensembl ID of feature \u001b[0m\u001b[32m(\u001b[0m\u001b[32mtranscript, regulatory, motif\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Feature_type'\u001b[0m: \u001b[32m'Type of feature. Currently one of Transcript, RegulatoryFeature, MotifFeature \u001b[0m\u001b[32m(\u001b[0m\u001b[32mor blank\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'GDC_FILTER'\u001b[0m: \u001b[32m'GDC filters applied universally across all MAFs'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'GDC_Validation_Status'\u001b[0m: \u001b[32m'GDC implementation of validation checks. See notes section \u001b[0m\u001b[32m(\u001b[0m\u001b[32m#5\u001b[0m\u001b[32m)\u001b[0m\u001b[32m below for \u001b[0m\n", - "\u001b[32mdetails'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'GMAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Gene'\u001b[0m: \u001b[32m'The gene symbol. In this table, gene symbol is gene name e.g. ACADVL'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'HGNC_ID'\u001b[0m: \u001b[32m'Gene identifier from the HUGO Gene Nomenclature Committee if applicable'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'HGVS_OFFSET'\u001b[0m: \u001b[32m'Indicates by how many bases the HGVS notations for this variant have been shifted'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'HGVSc'\u001b[0m: \u001b[32m'The coding sequence of the variant in HGVS recommended format'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'HGVSp'\u001b[0m: \u001b[32m'The protein sequence of the variant in HGVS recommended format. p.= signifies no change in the \u001b[0m\n", - "\u001b[32mprotein'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'HGVSp_Short'\u001b[0m: \u001b[32m'Same as the HGVSp column, but using 1-letter amino-acid codes'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Hugo_Symbol'\u001b[0m: \u001b[32m'HUGO symbol for the gene \u001b[0m\u001b[32m(\u001b[0m\u001b[32mHUGO symbols are always in all caps\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Unknown is used for regions\u001b[0m\n", - "\u001b[32mthat do not correspond to a gene'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'IMPACT'\u001b[0m: \u001b[32m'The impact modifier for the consequence type'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'INTRON'\u001b[0m: \u001b[32m'The intron number \u001b[0m\u001b[32m(\u001b[0m\u001b[32mout of total number\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'MC3_Overlap'\u001b[0m: \u001b[32m'Indicates whether this region overlaps with an MC3 variant for the same sample pair'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'MINIMISED'\u001b[0m: \u001b[32m'Alleles in this variant have been converted to minimal representation before consequence \u001b[0m\n", - "\u001b[32mcalculation \u001b[0m\u001b[32m(\u001b[0m\u001b[32m1 or null\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Matched_Norm_Sample_UUID'\u001b[0m: \u001b[32m'Unique GDC identifier for normal aliquot \u001b[0m\u001b[32m(\u001b[0m\u001b[32m10189 unique\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Mutation_Status'\u001b[0m: \u001b[32m'An assessment of the mutation as somatic, germline, LOH, post transcriptional \u001b[0m\n", - "\u001b[32mmodification, unknown, or none. The values allowed in this field are constrained by the value in the \u001b[0m\n", - "\u001b[32mValidation_Status field'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'NCBI_Build'\u001b[0m: \u001b[32m'The reference genome used for the alignment \u001b[0m\u001b[32m(\u001b[0m\u001b[32mGRCh38\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'One_Consequence'\u001b[0m: \u001b[32m'The single consequence of the canonical transcript in sequence ontology terms, eg \u001b[0m\n", - "\u001b[32mmissense_variant'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'PHENO'\u001b[0m: \u001b[32m'Indicates if existing variant is associated with a phenotype, disease or trait \u001b[0m\u001b[32m(\u001b[0m\u001b[32m0, 1, or null\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'PICK'\u001b[0m: \u001b[32m\"Indicates if this block of consequence data was picked by VEP's pick feature \u001b[0m\u001b[32m(\u001b[0m\u001b[32m1 or null\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\"\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'PUBMED'\u001b[0m: \u001b[32m'Pubmed ID\u001b[0m\u001b[32m(\u001b[0m\u001b[32ms\u001b[0m\u001b[32m)\u001b[0m\u001b[32m of publications that cite existing variant'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'PolyPhen'\u001b[0m: \u001b[32m'The PolyPhen prediction and/or score'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Protein_position'\u001b[0m: \u001b[32m'Relative position of affected amino acid in protein. A - symbol is displayed as the \u001b[0m\n", - "\u001b[32mnumerator if the variant does not appear in coding sequence'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'RefSeq'\u001b[0m: \u001b[32m'RefSeq identifier for this transcript'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Reference_Allele'\u001b[0m: \u001b[32m'The plus strand reference allele at this position. Includes the deleted sequence for a\u001b[0m\n", - "\u001b[32mdeletion or - for an insertion'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'SAS_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined South Asian \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'SIFT'\u001b[0m: \u001b[32m'The SIFT prediction and/or score, with both given as prediction \u001b[0m\u001b[32m(\u001b[0m\u001b[32mscore\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'SOMATIC'\u001b[0m: \u001b[32m'Somatic status of each ID reported under Existing_variation \u001b[0m\u001b[32m(\u001b[0m\u001b[32m0, 1, or null\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'SWISSPROT'\u001b[0m: \u001b[32m'UniProtKB/Swiss-Prot accession'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'SYMBOL'\u001b[0m: \u001b[32m'Eg TP53, LRP1B, etc \u001b[0m\u001b[32m(\u001b[0m\u001b[32msame as Hugo_Symbol field except blank instead of Unknown'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'SYMBOL_SOURCE'\u001b[0m: \u001b[32m'The source of the gene symbol, usually HGNC, rarely blank, other sources include \u001b[0m\n", - "\u001b[32mUniprot_gn, EntrezGene, etc'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Sequencer'\u001b[0m: \u001b[32m'Instrument used to produce primary sequence data'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Start_Position'\u001b[0m: \u001b[32m'Lowest numeric position of the reported variant on the genomic reference sequence. \u001b[0m\n", - "\u001b[32mMutation start coordinate'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Strand'\u001b[0m: \u001b[32m'Either + or - to denote whether read mapped to the sense \u001b[0m\u001b[32m(\u001b[0m\u001b[32m+\u001b[0m\u001b[32m)\u001b[0m\u001b[32m or anti-sense \u001b[0m\u001b[32m(\u001b[0m\u001b[32m-\u001b[0m\u001b[32m)\u001b[0m\u001b[32m strand'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'TRANSCRIPT_STRAND'\u001b[0m: \u001b[32m'The DNA strand \u001b[0m\u001b[32m(\u001b[0m\u001b[32m1 or -1\u001b[0m\u001b[32m)\u001b[0m\u001b[32m on which the transcript/feature lies'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'TREMBL'\u001b[0m: \u001b[32m'UniProtKB/TrEMBL identifier of protein product'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'TSL'\u001b[0m: \u001b[32m'Transcript support level, which is based on independent RNA analyses'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Transcript_ID'\u001b[0m: \u001b[32m'Ensembl ID of the transcript affected by the variant'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Tumor_Sample_UUID'\u001b[0m: \u001b[32m'Unique GDC identifier for tumor aliquot \u001b[0m\u001b[32m(\u001b[0m\u001b[32m10189 unique\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Tumor_Seq_Allele1'\u001b[0m: \u001b[32m'Primary data genotype for tumor sequencing \u001b[0m\u001b[32m(\u001b[0m\u001b[32mdiscovery\u001b[0m\u001b[32m)\u001b[0m\u001b[32m allele 1. A - symbol for a \u001b[0m\n", - "\u001b[32mdeletion represents a variant. A - symbol for an insertion represents wild-type allele. Novel inserted sequence for\u001b[0m\n", - "\u001b[32minsertion does not include flanking reference bases'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Tumor_Seq_Allele2'\u001b[0m: \u001b[32m'Primary data genotype for tumor sequencing \u001b[0m\u001b[32m(\u001b[0m\u001b[32mdiscovery\u001b[0m\u001b[32m)\u001b[0m\u001b[32m allele 2. A - symbol for a \u001b[0m\n", - "\u001b[32mdeletion represents a variant. A - symbol for an insertion represents wild-type allele. Novel inserted sequence for\u001b[0m\n", - "\u001b[32minsertion does not include flanking reference bases'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Tumor_Validation_Allele1'\u001b[0m: \u001b[32m'Secondary data from orthogonal technology. Tumor genotyping \u001b[0m\u001b[32m(\u001b[0m\u001b[32mvalidation\u001b[0m\u001b[32m)\u001b[0m\u001b[32m for \u001b[0m\n", - "\u001b[32mallele 1. A - symbol for a deletion represents a variant. A - symbol for an insertion represents wild-type allele. \u001b[0m\n", - "\u001b[32mNovel inserted sequence for insertion does not include flanking reference bases'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Tumor_Validation_Allele2'\u001b[0m: \u001b[32m'Secondary data from orthogonal technology. Tumor genotyping \u001b[0m\u001b[32m(\u001b[0m\u001b[32mvalidation\u001b[0m\u001b[32m)\u001b[0m\u001b[32m for \u001b[0m\n", - "\u001b[32mallele 2'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'UNIPARC'\u001b[0m: \u001b[32m'UniParc identifier of protein product'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'VARIANT_CLASS'\u001b[0m: \u001b[32m'Sequence Ontology variant class'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Validation_Method'\u001b[0m: \u001b[32m'The assay platforms used for the validation call'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Variant_Classification'\u001b[0m: \u001b[32m'Translational effect of variant allele'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Variant_Type'\u001b[0m: \u001b[32m'Type of mutation. TNP \u001b[0m\u001b[32m(\u001b[0m\u001b[32mtri-nucleotide polymorphism\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is analogous to DNP \u001b[0m\u001b[32m(\u001b[0m\u001b[32mdi-nucleotide \u001b[0m\n", - "\u001b[32mpolymorphism\u001b[0m\u001b[32m)\u001b[0m\u001b[32m but for three consecutive nucleotides. ONP \u001b[0m\u001b[32m(\u001b[0m\u001b[32moligo-nucleotide polymorphism\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is analogous to TNP but \u001b[0m\n", - "\u001b[32mfor consecutive runs of four or more \u001b[0m\u001b[32m(\u001b[0m\u001b[32mSNP, DNP, TNP, ONP, INS, DEL, or Consolidated\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'age_at_diagnosis'\u001b[0m: \u001b[32m'The age in days of the individual at the time of diagnosis'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'aliquot_barcode_normal'\u001b[0m: \u001b[32m'TCGA aliquot barcode for the normal control, eg TCGA-12-1089-01A-01D-0517-01'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'aliquot_barcode_tumor'\u001b[0m: \u001b[32m'TCGA aliquot barcode for the tumor, eg TCGA-12-1089-01A-01D-0517-01'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'all_effects'\u001b[0m: \u001b[32m'A semicolon delimited list of all possible variant effects, sorted by priority \u001b[0m\n", - "\u001b[32m(\u001b[0m\u001b[32m[\u001b[0m\u001b[32mSymbol,Consequence,HGVSp_Short,Transcript_ID,RefSeq,HGVSc,Impact,Canonical,Sift,PolyPhen,Strand\u001b[0m\u001b[32m]\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'anatomical_site'\u001b[0m: \u001b[32m'Per GDC Dictionary, the text term that represents the name of the primary disease site \u001b[0m\n", - "\u001b[32mof the submitted tumor sample; recommend dropping tumor; biospecimen_anatomic_site.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'byte_size'\u001b[0m: \u001b[32m'Size of the file in bytes. Maps to dcat:byteSize.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'cDNA_position'\u001b[0m: \u001b[32m'Relative position of base pair in the cDNA sequence as a fraction. A - symbol is \u001b[0m\n", - "\u001b[32mdisplayed as the numerator if the variant does not appear in cDNA'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'callerName'\u001b[0m: \u001b[32m'|-delimited list of mutation caller\u001b[0m\u001b[32m(\u001b[0m\u001b[32ms\u001b[0m\u001b[32m)\u001b[0m\u001b[32m that agreed on this particular call, always in \u001b[0m\n", - "\u001b[32malphabetical order: muse, mutect, somaticsniper, varscan'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'case_barcode'\u001b[0m: \u001b[32m'Original TCGA case barcode, eg TCGA-DX-A8BN'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'case_id'\u001b[0m: \u001b[32m'Unique GDC identifier for the underlying case'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'cause_of_death'\u001b[0m: \u001b[32m'Coded value indicating the circumstance or condition that results in the death of the \u001b[0m\n", - "\u001b[32msubject.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'checksum'\u001b[0m: \u001b[32m'A digit representing the sum of the correct digits in a piece of stored or transmitted digital\u001b[0m\n", - "\u001b[32mdata, against which later comparisons can be made to detect errors in the data.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'data_category'\u001b[0m: \u001b[32m'Broad categorization of the contents of the data file.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'data_modality'\u001b[0m: \u001b[32m'Data modality describes the biological nature of the information gathered as the result \u001b[0m\n", - "\u001b[32mof an Activity, independent of the technology or methods used to produce the information.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'data_type'\u001b[0m: \u001b[32m'Specific content type of the data file.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'days_to_birth'\u001b[0m: \u001b[32m\"Number of days between the date used for index and the date from a person's date of birth\u001b[0m\n", - "\u001b[32mrepresented as a calculated negative number of days.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'days_to_collection'\u001b[0m: \u001b[32m'The number of days from the index date to either the date a sample was collected for\u001b[0m\n", - "\u001b[32ma specific study or project, or the date a patient underwent a procedure \u001b[0m\u001b[32m(\u001b[0m\u001b[32me.g. surgical resection\u001b[0m\u001b[32m)\u001b[0m\u001b[32m yielding a \u001b[0m\n", - "\u001b[32msample that was eventually used for research.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'days_to_death'\u001b[0m: \u001b[32m\"Number of days between the date used for index and the date from a person's date of death\u001b[0m\n", - "\u001b[32mrepresented as a calculated number of days.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'days_to_treatment_end'\u001b[0m: \u001b[32m' The timepoint at which the treatment ended.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'days_to_treatment_start'\u001b[0m: \u001b[32m'The timepoint at which the treatment started.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dbSNP_RS'\u001b[0m: \u001b[32m'The rs-IDs from the dbSNP database, novel if not found in any database used, or null if \u001b[0m\n", - "\u001b[32mthere is no dbSNP record, but it is found in other databases'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dbSNP_Val_Status'\u001b[0m: \u001b[32m'The dbSNP validation status is reported as a semicolon-separated list of statuses. The\u001b[0m\n", - "\u001b[32munion of all rs-IDs is taken when there are multiple'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'dbgap_accession_number'\u001b[0m: \u001b[32m'The dbgap accession number for the project.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'derived_from_specimen'\u001b[0m: \u001b[32m'A source/parent specimen from which this one was directly derived.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'derived_from_subject'\u001b[0m: \u001b[32m'The Patient/ResearchSubject, or Biologically Derived Materal \u001b[0m\u001b[32m(\u001b[0m\u001b[32me.g. a cell line, \u001b[0m\n", - "\u001b[32mtissue culture, organoid\u001b[0m\u001b[32m)\u001b[0m\u001b[32m from which the specimen was directly or indirectly derived.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'diagnosis_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is \u001b[0m\n", - "\u001b[32munique within a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'diagnosis_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'diagnosis_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'drs_uri'\u001b[0m: \u001b[32m'A string of characters used to identify a resource on the Data Repo Service\u001b[0m\u001b[32m(\u001b[0m\u001b[32mDRS\u001b[0m\u001b[32m)\u001b[0m\u001b[32m.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'ethnicity'\u001b[0m: \u001b[32m\"An individual's self-described social and cultural grouping, specifically whether an \u001b[0m\n", - "\u001b[32mindividual describes themselves as Hispanic or Latino. The provided values are based on the categories defined by \u001b[0m\n", - "\u001b[32mthe U.S. Office of Management and Business and used by the U.S. Census Bureau.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'fileName'\u001b[0m: \u001b[32m'|-delimited list of name of underlying MAF file'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'fileUUID'\u001b[0m: \u001b[32m'|-delimited list of unique GDC identifiers for underlying MAF file'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'file_associated_project'\u001b[0m: \u001b[32m'A reference to the Project\u001b[0m\u001b[32m(\u001b[0m\u001b[32ms\u001b[0m\u001b[32m)\u001b[0m\u001b[32m of which this ResearchSubject is a member. The \u001b[0m\n", - "\u001b[32massociated_project may be embedded using the $ref definition or may be a reference to the id for the Project - or a\u001b[0m\n", - "\u001b[32mURI expressed as a string to an existing entity.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'file_format'\u001b[0m: \u001b[32m'Format of the data files.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'file_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique \u001b[0m\n", - "\u001b[32mwithin a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'file_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'file_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'grade'\u001b[0m: \u001b[32m'The degree of abnormality of cancer cells, a measure of differentiation, the extent to which \u001b[0m\n", - "\u001b[32mcancer cells are similar in appearance and function to healthy cells of the same tissue type. The degree of \u001b[0m\n", - "\u001b[32mdifferentiation often relates to the clinical behavior of the particular tumor. Based on the microscopic findings, \u001b[0m\n", - "\u001b[32mtumor grade is commonly described by one of four degrees of severity. Histopathologic grade of a tumor may be used \u001b[0m\n", - "\u001b[32mto plan treatment and estimate the future course, outcome, and overall prognosis of disease. Certain types of \u001b[0m\n", - "\u001b[32mcancers, such as soft tissue sarcoma, primary brain tumors, lymphomas, and breast have special grading systems.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'imaging_modality'\u001b[0m: \u001b[32m'An imaging modality describes the imaging equipment and/or method used to acquire \u001b[0m\n", - "\u001b[32mcertain structural or functional information about the body. These include but are not limited to computed \u001b[0m\n", - "\u001b[32mtomography \u001b[0m\u001b[32m(\u001b[0m\u001b[32mCT\u001b[0m\u001b[32m)\u001b[0m\u001b[32m and magnetic resonance imaging \u001b[0m\u001b[32m(\u001b[0m\u001b[32mMRI\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Taken from the DICOM standard.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'imaging_series'\u001b[0m: \u001b[32m\"The 'logical' identifier of the series or grouping of imaging files in the system of \u001b[0m\n", - "\u001b[32mrecord which the file is a part of.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'label'\u001b[0m: \u001b[32m'Short name or abbreviation for dataset. Maps to rdfs:label.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'member_of_research_project'\u001b[0m: \u001b[32m'A reference to the Study\u001b[0m\u001b[32m(\u001b[0m\u001b[32ms\u001b[0m\u001b[32m)\u001b[0m\u001b[32m of which this ResearchSubject is a member.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'method_of_diagnosis'\u001b[0m: \u001b[32m'The method used to confirm the patients malignant diagnosis'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'morphology'\u001b[0m: \u001b[32m'Code that represents the histology of the disease using the third edition of the \u001b[0m\n", - "\u001b[32mInternational Classification of Diseases for Oncology, published in 2000, used principally in tumor and cancer \u001b[0m\n", - "\u001b[32mregistri'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'n_depth'\u001b[0m: \u001b[32m'Read depth across this locus in normal BAM'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'normal_bam_uuid'\u001b[0m: \u001b[32m'Unique GDC identifier for the underlying normal bam file'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'number_of_cycles'\u001b[0m: \u001b[32m'The number of treatment cycles the subject received.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'primary_diagnosis'\u001b[0m: \u001b[32m'The diagnosis instance that qualified a subject for inclusion on a ResearchProject'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'primary_diagnosis_condition'\u001b[0m: \u001b[32m\"The text term used to describe the type of malignant disease, as \u001b[0m\n", - "\u001b[32mcategorized by the World Health Organization's \u001b[0m\u001b[32m(\u001b[0m\u001b[32mWHO\u001b[0m\u001b[32m)\u001b[0m\u001b[32m International Classification of Diseases for Oncology \u001b[0m\u001b[32m(\u001b[0m\u001b[32mICD-O\u001b[0m\u001b[32m)\u001b[0m\u001b[32m.\u001b[0m\n", - "\u001b[32mThis attribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'primary_diagnosis_site'\u001b[0m: \u001b[32m\"The text term used to describe the primary site of disease, as categorized by \u001b[0m\n", - "\u001b[32mthe World Health Organization's \u001b[0m\u001b[32m(\u001b[0m\u001b[32mWHO\u001b[0m\u001b[32m)\u001b[0m\u001b[32m International Classification of Diseases for Oncology \u001b[0m\u001b[32m(\u001b[0m\u001b[32mICD-O\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. This \u001b[0m\n", - "\u001b[32mcategorization groups cases into general categories. This attribute represents the primary site of disease that \u001b[0m\n", - "\u001b[32mqualified the subject for inclusion on the ResearchProject.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'primary_disease_type'\u001b[0m: \u001b[32m\"The text term used to describe the type of malignant disease, as categorized by \u001b[0m\n", - "\u001b[32mthe World Health Organization's \u001b[0m\u001b[32m(\u001b[0m\u001b[32mWHO\u001b[0m\u001b[32m)\u001b[0m\u001b[32m International Classification of Diseases for Oncology \u001b[0m\u001b[32m(\u001b[0m\u001b[32mICD-O\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. This \u001b[0m\n", - "\u001b[32mattribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'project_short_name'\u001b[0m: \u001b[32m'Project name abbreviation; the program name appended with a project name \u001b[0m\n", - "\u001b[32mabbreviation; eg. TCGA-OV, etc.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'race'\u001b[0m: \u001b[32m'An arbitrary classification of a taxonomic group that is a division of a species. It usually \u001b[0m\n", - "\u001b[32marises as a consequence of geographical isolation within a species and is characterized by shared heredity, \u001b[0m\n", - "\u001b[32mphysical attributes and behavior, and in the case of humans, by common history, nationality, or geographic \u001b[0m\n", - "\u001b[32mdistribution. The provided values are based on the categories defined by the U.S. Office of Management and Business\u001b[0m\n", - "\u001b[32mand used by the U.S. Census Bureau.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'researchsubject_Files'\u001b[0m: \u001b[32m'List of ids of File entities associated with the ResearchSubject'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'researchsubject_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the system of record, e.g. a UUID. This \u001b[0m\n", - "\u001b[32m'id' is unique within a given system. The identified entity may have a different 'id' in a different system. For \u001b[0m\n", - "\u001b[32mCDA, this is case_id.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'researchsubject_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'researchsubject_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'sample_barcode_normal'\u001b[0m: \u001b[32m'TCGA sample barcode for the normal control, eg TCGA-12-1089-01A. One sample may \u001b[0m\n", - "\u001b[32mhave multiple sets of CN segmentations corresponding to multiple aliquots; use GROUP BY appropriately in queries'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'sample_barcode_tumor'\u001b[0m: \u001b[32m'TCGA sample barcode for the tumor, eg TCGA-12-1089-01A. One sample may have \u001b[0m\n", - "\u001b[32mmultiple sets of CN segmentations corresponding to multiple aliquots; use GROUP BY appropriately in queries'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'sex'\u001b[0m: \u001b[32m\"The biologic character or quality that distinguishes male and female from one another as expressed \u001b[0m\n", - "\u001b[32mby analysis of the person's gonadal, morphologic \u001b[0m\u001b[32m(\u001b[0m\u001b[32minternal and external\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, chromosomal, and hormonal \u001b[0m\n", - "\u001b[32mcharacteristics.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'source_material_type'\u001b[0m: \u001b[32m'The general kind of material from which the specimen was derived, indicating the \u001b[0m\n", - "\u001b[32mphysical nature of the source material. '\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'species'\u001b[0m: \u001b[32m'The taxonomic group \u001b[0m\u001b[32m(\u001b[0m\u001b[32me.g. species\u001b[0m\u001b[32m)\u001b[0m\u001b[32m of the patient. For MVP, since taxonomy vocabulary is \u001b[0m\n", - "\u001b[32mconsistent between GDC and PDC, using text. Ultimately, this will be a term returned by the vocabulary service.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'specimen_Files'\u001b[0m: \u001b[32m'List of ids of File entities associated with the Specimen'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'specimen_associated_project'\u001b[0m: \u001b[32m'The Project associated with the specimen.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'specimen_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is \u001b[0m\n", - "\u001b[32munique within a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'specimen_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'specimen_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'specimen_type'\u001b[0m: \u001b[32m'The high-level type of the specimen, based on its how it has been derived from the \u001b[0m\n", - "\u001b[32moriginal extracted sample. \\n'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'src_vcf_id'\u001b[0m: \u001b[32m'|-delimited list of GDC VCF file identifiers'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'stage'\u001b[0m: \u001b[32m'The extent of a cancer in the body. Staging is usually based on the size of the tumor, whether \u001b[0m\n", - "\u001b[32mlymph nodes contain cancer, and whether the cancer has spread from the original site to other parts of the body.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'subject_Files'\u001b[0m: \u001b[32m'List of ids of File entities associated with the Patient'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'subject_associated_project'\u001b[0m: \u001b[32m'The list of Projects associated with the Subject.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'subject_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is \u001b[0m\n", - "\u001b[32munique within a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'subject_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'subject_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m't_alt_count'\u001b[0m: \u001b[32m'Read depth supporting the variant allele in tumor BAM'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m't_depth'\u001b[0m: \u001b[32m'Read depth across this locus in tumor BAM'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m't_ref_count'\u001b[0m: \u001b[32m'Read depth supporting the reference allele in tumor BAM'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'therapeutic_agent'\u001b[0m: \u001b[32m'One or more therapeutic agents as part of this treatment.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_anatomic_site'\u001b[0m: \u001b[32m'The anatomical site that the treatment targets.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_effect'\u001b[0m: \u001b[32m'The effect of a treatment on the diagnosis or tumor.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_end_reason'\u001b[0m: \u001b[32m'The reason the treatment ended.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'treatment_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is \u001b[0m\n", - "\u001b[32munique within a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_outcome'\u001b[0m: \u001b[32m'The final outcome of the treatment.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_type'\u001b[0m: \u001b[32m'The treatment type including medication/therapeutics or other procedures.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'tumor_bam_uuid'\u001b[0m: \u001b[32m'Unique GDC identifier for the underlying bam file'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'vital_status'\u001b[0m: \u001b[32m'Coded value indicating the state or condition of being living or deceased; also includes \u001b[0m\n", - "\u001b[32mthe case where the vital status is unknown.'\u001b[0m\n", - " \u001b[1m}\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(columns().to_list())" - ] - }, - { - "cell_type": "markdown", - "id": "bd05eba2", - "metadata": {}, - "source": [ - "\n", - " \n", - " \n", - "There are a lot of columns in the CDA data, but Julia is most interested in diagnosis data, so she filters the list to only those:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "536970c4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'age_at_diagnosis': 'The age in days of the individual at the time of diagnosis'},\n", - " {'diagnosis_id': \"The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.\"},\n", - " {'diagnosis_identifier_system': 'The system or namespace that defines the identifier.'},\n", - " {'diagnosis_identifier_value': 'The value of the identifier, as defined by the system.'},\n", - " {'method_of_diagnosis': 'The method used to confirm the patients malignant diagnosis'},\n", - " {'primary_diagnosis': 'The diagnosis instance that qualified a subject for inclusion on a ResearchProject'},\n", - " {'primary_diagnosis_condition': \"The text term used to describe the type of malignant disease, as categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This attribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"},\n", - " {'primary_diagnosis_site': \"The text term used to describe the primary site of disease, as categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This categorization groups cases into general categories. This attribute represents the primary site of disease that qualified the subject for inclusion on the ResearchProject.\"}]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "columns().to_list(filters=\"diagnosis\")" - ] - }, - { - "cell_type": "markdown", - "id": "a63b4cf0", - "metadata": {}, - "source": [ - "
\n", - "\n", - "To search the CDA, a user also needs to know what search terms are available. Each column will contain a huge amount of data, so retreiving all of the rows would be overwhelming. Instead, the CDA has a `unique_terms()` function that will return all of the unique values that populate the requested column. Like `columns`, `unique_terms` defaults to giving us an overview of the results, and can be filtered.\n", - " \n", - "
\n", - "\n", - "\n", - " \n", - "Since Julia is interested specificially in uterine cancers, she uses the `unique_terms` function to see what data is available for 'ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site' and 'ResearchSubject.primary_diagnosis_site' to see if 'uterine' appears:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4527dde5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n",
-       "            Http Status: 400\n",
-       "            Error Message: Column ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site does not exist\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m400\u001b[0m\n", - " Error Message: Column ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site does not exist\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "AttributeError", - "evalue": "'NoneType' object has no attribute 'to_list'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/tmp/ipykernel_74/2639024179.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0munique_terms\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'to_list'" - ] - } - ], - "source": [ - "unique_terms(\"ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site\").to_list()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "740e5955", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list()" - ] - }, - { - "cell_type": "markdown", - "id": "b005036b", - "metadata": {}, - "source": [ - "
\n", - " \n", - "CDA makes multiple datasets searchable from a common interface, but does not harmonize the data. This means that researchers should review all the terms in a column, and not just choose the first one that fits, as there may be other similar terms available as well.\n", - " \n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "73e6b8dc", - "metadata": {}, - "source": [ - "\n", - " \n", - "Julia sees that \"treatment_anatomic_site\" does not have 'Uterine', but does have 'Cervix'. She also notes that both 'Uterus' and 'Uterus, NOS' are listed in the \"primary_diagnosis_site\" results. As she was initially looking for \"uterine\", Julia decides to expand her search a bit to account for variable naming schemes. So, she runs a fuzzy match filter on the \"ResearchSubject.primary_diagnosis_site\" for 'uter' as that should cover all variants:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "31064125", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(filters=\"uter\")" - ] - }, - { - "cell_type": "markdown", - "id": "9311a49e", - "metadata": {}, - "source": [ - "\n", - " \n", - "Just to be sure, Julia also searches for any other instances of \"cervix\":" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2038a8cf", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(filters=\"cerv\")" - ] - }, - { - "cell_type": "markdown", - "id": "29c4de58", - "metadata": {}, - "source": [ - "## Building a Query\n", - "\n", - "\n", - " \n", - "With all her likely terms found, Julia begins to create a search that will get data for all of her terms. She does this by writing a series of `Q` statements that define what rows should be returned from each column. For the \"treatment_anatomic_site\", only one term is of interest, so she uses the `=` operator to get only exact matches:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "951fcc8f", - "metadata": {}, - "outputs": [], - "source": [ - "Tsite = Q('ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site = \"Cervix\"')" - ] - }, - { - "cell_type": "markdown", - "id": "12cb5f72", - "metadata": {}, - "source": [ - "\n", - " \n", - "However, for \"primary_diagnosis_site\", Julia has several terms she wants to search with. Luckily, `Q` also can run fuzzy searches. It can also search more than one term at a time, so Julia writes one big `Q` statement to grab everything that is either 'uter' or 'cerv':" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36cfd8a4", - "metadata": {}, - "outputs": [], - "source": [ - "Dsite = Q('ResearchSubject.primary_diagnosis_site = \"%uter%\" OR ResearchSubject.primary_diagnosis_site = \"%cerv%\"')" - ] - }, - { - "cell_type": "markdown", - "id": "349af6f2", - "metadata": {}, - "source": [ - "\n", - " \n", - "Finally, Julia adds her two queries together into one large one:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9f5f9e4f", - "metadata": {}, - "outputs": [], - "source": [ - "ALLDATA = Tsite.OR(Dsite)" - ] - }, - { - "cell_type": "markdown", - "id": "c1f5cb55", - "metadata": {}, - "source": [ - "## Looking at Summary Data\n", - "\n", - "\n", - " \n", - "Now that Julia has a query, she can use it to look for data in any of the CDA endpoints. She starts by getting an overall summary of what data is available using `count`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "355b1706", - "metadata": {}, - "outputs": [], - "source": [ - "ALLDATA.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "b7ce25fc", - "metadata": {}, - "source": [ - "\n", - " \n", - "It seems there's a lot of data that might work for Julias study! Since she is interested in the beginings of cancer, she decides to start by looking at the researchsubject information, since that is where most of the diagnosis information is. She again gets a summary using `count`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55b0cdeb", - "metadata": {}, - "outputs": [], - "source": [ - "ALLDATA.researchsubject.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "86a323e2", - "metadata": {}, - "source": [ - "## Refining Queries\n", - "\n", - "\n", - " \n", - "Browsing the primary_diagnosis_condition data, Julia notices that there are a large number of research subjects that are Adenomas and Adenocarcinomas. Since Julia wants to look for common phenotypes in early cancers, she decides it might be easier to exclude the endocrine related data, as they might have different mechanisms. So she adds a new filter to her query:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d526198", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "Noadeno = Q('ResearchSubject.primary_diagnosis_condition != \"Adenomas and Adenocarcinomas\"')\n", - "\n", - "NoAdenoData = ALLDATA.AND(Noadeno)\n", - "\n", - "NoAdenoData.researchsubject.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "40a0191d", - "metadata": {}, - "source": [ - "\n", - " \n", - "She then previews the actual metadata for researchsubject, subject, and file, to make sure that they have all the information she will need for her work. Since she's mostly interested in looking at the kinds of data available from each endpoint:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d186b837", - "metadata": {}, - "outputs": [], - "source": [ - "NoAdenoData.researchsubject.run().to_dataframe() # view the dataframe" - ] - }, - { - "cell_type": "markdown", - "id": "086697b3", - "metadata": {}, - "source": [ - "---\n", - "\n", - "
\n", - "\n", - "

ResearchSubject Field Definitions

\n", - "\n", - "A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subjects privacy. This entity plays the role of the case_id in existing data. An individual who participates in 3 studies will have 3 researchsubject IDs\n", - " \n", - "
    \n", - "
  • id: The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system. For CDA, this is case_id.
  • \n", - "
  • identifier: A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity.
  • \n", - "
  • identifier.system: The system or namespace that defines the identifier.
  • \n", - "
  • identifier.value: The value of the identifier, as defined by the system.
  • \n", - "
  • member_of_research_project: A reference to the Study(s) of which this ResearchSubject is a member.
  • \n", - "
  • primary_diagnosis_condition: The text term used to describe the type of malignant disease, as categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This attribute represents the disease that qualified the subject for inclusion on the ResearchProject.
  • \n", - "
  • primary_diagnosis_site: The text term used to describe the primary site of disease, as categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This categorization groups cases into general categories. This attribute represents the primary site of disease that qualified the subject for inclusion on the ResearchProject.
  • \n", - "
  • subject_id: The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system. Can be joined to the `id` field from subject results
  • \n", - "
\n", - "\n", - "
\n", - " \n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d0f5e2f", - "metadata": {}, - "outputs": [], - "source": [ - "NoAdenoData.subject.run().to_dataframe() # view the dataframe" - ] - }, - { - "cell_type": "markdown", - "id": "dec76132", - "metadata": {}, - "source": [ - "---\n", - "\n", - "
\n", - "\n", - "

Subject Field Definitions

\n", - "\n", - "A patient entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subjects privacy.\n", - "\n", - " \n", - "
    \n", - "
  • id: The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.\",STRING
  • \n", - "
  • identifier: A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity.
  • \n", - "
  • identifier.system: The system or namespace that defines the identifier.
  • \n", - "
  • identifier.value: The value of the identifier, as defined by the system.
  • \n", - "
  • species: The taxonomic group (e.g. species) of the patient. For MVP, since taxonomy vocabulary is consistent between GDC and PDC, using text. Ultimately, this will be a term returned by the vocabulary service.
  • \n", - "
  • sex: The biologic character or quality that distinguishes male and female from one another as expressed by analysis of the person's gonadal, morphologic (internal and external), chromosomal, and hormonal characteristics.
  • \n", - "
  • race: An arbitrary classification of a taxonomic group that is a division of a species. It usually arises as a consequence of geographical isolation within a species and is characterized by shared heredity, physical attributes and behavior, and in the case of humans, by common history, nationality, or geographic distribution. The provided values are based on the categories defined by the U.S. Office of Management and Business and used by the U.S. Census Bureau.
  • \n", - "
  • ethnicity: An individual's self-described social and cultural grouping, specifically whether an individual describes themselves as Hispanic or Latino. The provided values are based on the categories defined by the U.S. Office of Management and Business and used by the U.S. Census Bureau.
  • \n", - "
  • days_to_birth: Number of days between the date used for index and the date from a person's date of birth represented as a calculated negative number of days.
  • \n", - "
  • subject_associated_project: The list of Projects associated with the Subject.
  • \n", - "
  • vital_status: Coded value indicating the state or condition of being living or deceased; also includes the case where the vital status is unknown.
  • \n", - "
  • days_to_death: Number of days between the date used for index and the date from a person's date of death represented as a calculated number of days.
  • \n", - "
  • cause_of_death: Coded value indicating the circumstance or condition that results in the death of the subject.
  • \n", - "
\n", - "\n", - "
\n", - " \n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04e04136", - "metadata": {}, - "outputs": [], - "source": [ - "NoAdenoData.file.run().to_dataframe() # view the dataframe" - ] - }, - { - "cell_type": "markdown", - "id": "8cf9f2d3", - "metadata": {}, - "source": [ - "\n", - "---\n", - "\n", - "
\n", - "\n", - "

File Field Definitions

\n", - "\n", - "A file is an information-bearing electronic object that contains a physical embodiment of some information using a particular character encoding.\n", - "\n", - " \n", - "
    \n", - "
  • id: The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.
  • \n", - "
  • identifier: A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity.
  • \n", - "
  • identifier.system: The system or namespace that defines the identifier.
  • \n", - "
  • identifier.value: The value of the identifier, as defined by the system.
  • \n", - "
  • label: Short name or abbreviation for dataset. Maps to rdfs:label.
  • \n", - "
  • data_catagory: Broad categorization of the contents of the data file.
  • \n", - "
  • data_type: Specific content type of the data file.
  • \n", - "
  • file_format: Format of the data files.
  • \n", - "
  • associated_project: A reference to the Project(s) of which this ResearchSubject is a member. The associated_project may be embedded using the ref definition or may be a reference to the id for the Project - or a URI expressed as a string to an existing entity.
  • \n", - "
  • drs_uri: A string of characters used to identify a resource on the Data Repo Service(DRS). Can be used to retreive this specific file from a server.
  • \n", - "
  • byte_size: Size of the file in bytes. Maps to dcat:byteSize.
  • \n", - "
  • checksum: The md5 value for the file. A digit representing the sum of the correct digits in a piece of stored or transmitted digital data, against which later comparisons can be made to detect errors in the data.
  • \n", - "
  • data_modality: Data modality describes the biological nature of the information gathered as the result of an Activity, independent of the technology or methods used to produce the information. Always one of \"Genomic\", \"Proteomic\", or \"Imaging\".
  • \n", - "
  • imaging_modality: An imaging modality describes the imaging equipment and/or method used to acquire certain structural or functional information about the body. These include but are not limited to computed tomography (CT) and magnetic resonance imaging (MRI). Taken from the DICOM standard.
  • \n", - "
  • dbgap_accession_number: The dbgap accession number for the project.
  • \n", - "
\n", - "\n", - "
\n", - " \n", - "---\n" - ] - }, - { - "cell_type": "markdown", - "id": "ba6aadbe", - "metadata": {}, - "source": [ - "## Working with Results (pagination)\n", - "\n", - "\n", - " \n", - "Finally, Julia wants to save these results to use for the future. Since the preview dataframes only show the first 100 results of each search, she uses the `paginator` function to get all the data from the subject and researchsubject endpoints into their own dataframes:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c2cec2bc", - "metadata": {}, - "outputs": [], - "source": [ - "researchsubs = NoAdenoData.researchsubject.run()\n", - "rsdf = pd.DataFrame()\n", - "for i in researchsubs.paginator(to_df=True):\n", - " rsdf = pd.concat([rsdf, i])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a1258057", - "metadata": {}, - "outputs": [], - "source": [ - "subs = NoAdenoData.subject.run()\n", - "subsdf = pd.DataFrame()\n", - "for i in subs.paginator(to_df=True):\n", - " subsdf = pd.concat([subsdf, i])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04cd73df", - "metadata": {}, - "outputs": [], - "source": [ - "rsdf # view the researchsubject dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92a6f811", - "metadata": {}, - "outputs": [], - "source": [ - "subsdf # view the subject dataframe" - ] - }, - { - "cell_type": "markdown", - "id": "75bcbe86", - "metadata": {}, - "source": [ - "## Merging Results across Endpoints\n", - "\n", - "\n", - " \n", - "Then Julia uses the `id` fields in each result to merge them together into one big dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9b7a3383", - "metadata": {}, - "outputs": [], - "source": [ - "allmetadata = pd.merge(rsdf,\n", - " subsdf,\n", - " left_on=\"subject_id\",\n", - " right_on='id')\n", - "\n", - "allmetadata" - ] - }, - { - "cell_type": "markdown", - "id": "024da831", - "metadata": {}, - "source": [ - "\n", - " \n", - "And saves it out to a csv so she can browse it with Excel:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b6628de4", - "metadata": {}, - "outputs": [], - "source": [ - "allmetadata.to_csv(\"allmetadata.csv\")" - ] - }, - { - "cell_type": "markdown", - "id": "246644d3", - "metadata": {}, - "source": [ - "\n", - " \n", - "Julia knows from her subject count summary that there are more than 200,000 files associated with her subjects, which is likely far more than she needs. To help her decide what files she wants, Julia uses endpoint chaining to get summary information about the files that are assigned to researchsubjects for her search criteria:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ae1ae079", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "NoAdenoData.researchsubject.file.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "111f47c0", - "metadata": {}, - "source": [ - "\n", - " \n", - "Julia decides that a good place to start would be with Slide Images. There's only 1111, so she should be able to quickly scan through them over the next few days and see if they will be useful. So she adds one more filter on her search:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4a170b3", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "JustSlides = Q('file.data_type = \"Slide Image\"')\n", - "NoadenoJustSlides = NoAdenoData.AND(JustSlides)\n", - "NoadenoJustSlides.researchsubject.file.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "0e385faf", - "metadata": {}, - "source": [ - "\n", - " \n", - "Finally, Julia uses the pagenation function again to get all the slide files, and merges her metadata file with this file information. This way she will be able to review what phenotypes each slide is associated with:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f45f6503", - "metadata": {}, - "outputs": [], - "source": [ - "slides = NoadenoJustSlides.researchsubject.file.run()\n", - "slidesdf = pd.DataFrame()\n", - "for i in slides.paginator(to_df=True):\n", - " slidesdf = pd.concat([slidesdf, i])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d1f7a21", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "slidemetadata = pd.merge(slidesdf, \n", - " allmetadata, \n", - " on=\"subject_id\")\n", - "slidemetadata" - ] - }, - { - "cell_type": "markdown", - "id": "3abb0f32", - "metadata": {}, - "source": [ - "\n", - " \n", - "She saves this file out as well." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ef6e0fb", - "metadata": {}, - "outputs": [], - "source": [ - "slidemetadata.to_csv(\"slidemetadata.csv\")" - ] - }, - { - "cell_type": "markdown", - "id": "369a3068", - "metadata": {}, - "source": [ - "\n", - " \n", - "Now Julia has all the information she needs to begin work on her project. She can use the `drs_id` column information to directly download the images she is interested in using a DRS resolver, or she can input the DRS IDs at a cloud workspace such as [Terra](https://terra.bio/) or the [Cancer Genomics Cloud](https://www.cancergenomicscloud.org/) to view the images online. In either case, she has all the metadata she needs to get started, and can save this notebook of her work in case she'd like to come back and modify her search." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/Untitled.ipynb b/notebooks/Untitled.ipynb deleted file mode 100644 index e6099756..00000000 --- a/notebooks/Untitled.ipynb +++ /dev/null @@ -1,1371 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "a93a79c9-352f-41cb-9113-6daadaef1d99", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'2022.11.3'" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cdapython import Q,columns,unique_terms\n", - "Q.get_version()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "525765be-da36-4349-8ca7-546fc95c15d7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
https://cancerdata.dsde-dev.broadinstitute.org/\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[4;94mhttps://cancerdata.dsde-dev.broadinstitute.org/\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
broad-dsde-dev.cda_dev\n",
-       "
\n" - ], - "text/plain": [ - "broad-dsde-dev.cda_dev\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "Q.set_default_project_dataset(\"broad-dsde-dev.cda_dev\")\n", - "Q.set_host_url(\"https://cancerdata.dsde-dev.broadinstitute.org/\")\n", - "\n", - "print(Q.get_host_url())\n", - "print(Q.get_default_project_dataset())" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "051e1ff6-e493-4a39-81ad-4459fef8a980", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 8.493 sec 8493 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m8.493\u001b[0m sec \u001b[1;36m8493\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9110309fa3c143f5951a300bf29719f4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 0\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m0\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 1000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 2000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m2000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 3000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m3000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 4000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m4000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 5000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m5000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 6000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m6000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 7000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m7000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 8000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m8000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 9000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m9000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 10000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m10000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 11000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m11000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 13000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m13000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 14000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m14000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 15000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m15000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 16000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m16000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 17000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m17000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 18000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m18000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 19000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m19000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 20000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m20000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 21000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m21000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 22000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m22000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 23000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m23000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 24000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m24000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 25000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m25000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 26000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m26000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 27000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m27000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 28000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m28000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 29000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m29000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 30000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m30000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 31000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m31000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 32000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m32000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 33000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m33000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 34000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m34000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 35000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m35000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 36000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m36000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 37000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m37000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 38000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m38000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 39000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m39000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 40000\n",
-       "            Count: 252\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: False\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m40000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m252\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;91;40mFalse\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
40252\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m40252\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "n = Q(\"sex = 'male' AND sex != 'null'\").run(limit=1000)\n", - "box = []\n", - "for i in n.paginator():\n", - " print(i, style=\"white on black\")\n", - " box.extend(i)\n", - "\n", - "print(len(box))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "75d36ce1-b772-4120-a695-68eb39f6e3dd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                    subject_id  \\\n",
-       "0                          103   \n",
-       "1                      11CO042   \n",
-       "2                      11CO057   \n",
-       "3                          121   \n",
-       "4                         2201   \n",
-       "...                        ...   \n",
-       "40247             TCGA-YL-A8S8   \n",
-       "40248             TCGA-YL-A8SL   \n",
-       "40249  UTRI_SUBJECT_001_000086   \n",
-       "40250  UTRI_SUBJECT_001_000241   \n",
-       "40251  UTRI_SUBJECT_001_000323   \n",
-       "\n",
-       "                                      subject_identifier       species   sex  \\\n",
-       "0                    [{'system': 'PDC', 'value': '103'}]  homo sapiens  male   \n",
-       "1      [{'system': 'GDC', 'value': '11CO042'}, {'syst...  homo sapiens  male   \n",
-       "2      [{'system': 'GDC', 'value': '11CO057'}, {'syst...  homo sapiens  male   \n",
-       "3                    [{'system': 'PDC', 'value': '121'}]  homo sapiens  male   \n",
-       "4                   [{'system': 'GDC', 'value': '2201'}]  homo sapiens  male   \n",
-       "...                                                  ...           ...   ...   \n",
-       "40247  [{'system': 'GDC', 'value': 'TCGA-YL-A8S8'}, {...  homo sapiens  male   \n",
-       "40248  [{'system': 'GDC', 'value': 'TCGA-YL-A8SL'}, {...  homo sapiens  male   \n",
-       "40249  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "40250  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "40251  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "\n",
-       "               race               ethnicity  days_to_birth  \\\n",
-       "0           chinese            not reported            NaN   \n",
-       "1             white  not hispanic or latino            NaN   \n",
-       "2             white  not hispanic or latino            NaN   \n",
-       "3           chinese            not reported            NaN   \n",
-       "4             white                 Unknown            NaN   \n",
-       "...             ...                     ...            ...   \n",
-       "40247         white  not hispanic or latino            NaN   \n",
-       "40248         white  not hispanic or latino            NaN   \n",
-       "40249  not reported            not reported            NaN   \n",
-       "40250  not reported            not reported            NaN   \n",
-       "40251  not reported            not reported            NaN   \n",
-       "\n",
-       "                              subject_associated_project  vital_status  \\\n",
-       "0      [Integrated Proteogenomic Characterization of ...          Dead   \n",
-       "1                                  [CPTAC-2, cptac_coad]  Not Reported   \n",
-       "2                                  [CPTAC-2, cptac_coad]  Not Reported   \n",
-       "3      [Integrated Proteogenomic Characterization of ...         Alive   \n",
-       "4                                    [BEATAML1.0-COHORT]         Alive   \n",
-       "...                                                  ...           ...   \n",
-       "40247                             [tcga_prad, TCGA-PRAD]         Alive   \n",
-       "40248                             [tcga_prad, TCGA-PRAD]         Alive   \n",
-       "40249                                         [TRIO-CRU]  Not Reported   \n",
-       "40250                                         [TRIO-CRU]  Not Reported   \n",
-       "40251                                         [TRIO-CRU]  Not Reported   \n",
-       "\n",
-       "       days_to_death cause_of_death  \n",
-       "0                NaN     Metastasis  \n",
-       "1                NaN           None  \n",
-       "2                NaN           None  \n",
-       "3                NaN           None  \n",
-       "4                NaN           None  \n",
-       "...              ...            ...  \n",
-       "40247            NaN           None  \n",
-       "40248            NaN           None  \n",
-       "40249            NaN           None  \n",
-       "40250            NaN           None  \n",
-       "40251            NaN           None  \n",
-       "\n",
-       "[40252 rows x 11 columns]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[37;44m subject_id \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m103\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m 11CO042 \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m 11CO057 \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m121\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2201\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m TCGA-YL-A8S8 \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m TCGA-YL-A8SL \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000086 \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000241 \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000323 \u001b[0m\n", - "\n", - "\u001b[37;44m subject_identifier species sex \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'PDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'103'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'11CO042'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[37;44m'syst\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'11CO057'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[37;44m'syst\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'PDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'121'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2201'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-YL-A8S8'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-YL-A8SL'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\n", - "\u001b[37;44m race ethnicity days_to_birth \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m chinese not reported NaN \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m chinese not reported NaN \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m white Unknown NaN \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\n", - "\u001b[37;44m subject_associated_project vital_status \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mIntegrated Proteogenomic Characterization of \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m Dead \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mCPTAC-\u001b[0m\u001b[1;36;44m2\u001b[0m\u001b[37;44m, cptac_coad\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mCPTAC-\u001b[0m\u001b[1;36;44m2\u001b[0m\u001b[37;44m, cptac_coad\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mIntegrated Proteogenomic Characterization of \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m Alive \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mtcga_prad, TCGA-PRAD\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mtcga_prad, TCGA-PRAD\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\n", - "\u001b[37;44m days_to_death cause_of_death \u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m NaN Metastasis \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\n", - "\u001b[1;37;44m[\u001b[0m\u001b[1;36;44m40252\u001b[0m\u001b[37;44m rows x \u001b[0m\u001b[1;36;44m11\u001b[0m\u001b[37;44m columns\u001b[0m\u001b[1;37;44m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import pandas as pd\n", - "df = pd.DataFrame(box)\n", - "print(df, style=\"white on blue\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "284ab4e3-8c39-452c-bcae-2a6d08b9b29e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/Untitled1.ipynb b/notebooks/Untitled1.ipynb deleted file mode 100644 index 37a153c8..00000000 --- a/notebooks/Untitled1.ipynb +++ /dev/null @@ -1,411 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "b974248c-48b3-4233-8211-4677aa63d377", - "metadata": {}, - "outputs": [], - "source": [ - "from cdapython import Q,unique_terms" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "1764bcf2-8ecf-45c8-b617-fe239ceb7b95", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
https://cancerdata.dsde-dev.broadinstitute.org/\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[4;94mhttps://cancerdata.dsde-dev.broadinstitute.org/\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
broad-dsde-dev.cda_dev\n",
-       "
\n" - ], - "text/plain": [ - "broad-dsde-dev.cda_dev\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "Q.set_default_project_dataset(\"broad-dsde-dev.cda_dev\")\n", - "Q.set_host_url(\"https://cancerdata.dsde-dev.broadinstitute.org/\")\n", - "\n", - "print(Q.get_host_url())\n", - "print(Q.get_default_project_dataset())" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "81058c54-afe6-4592-b5e7-46c61e646ff1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Total execution time: 7374 ms 7.374 sec 0 min\n",
-       "
\n" - ], - "text/plain": [ - "Total execution time: \u001b[1;36m7374\u001b[0m ms \u001b[1;36m7.374\u001b[0m sec \u001b[1;36m0\u001b[0m min\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\n", - " Query:SELECT results.* EXCEPT(rn) FROM (SELECT ROW_NUMBER() OVER (PARTITION BY all_Files_v3_0_final.id) as rn, all_Subjects_v3_0_final.id AS id FROM broad-dsde-dev.cda_dev.all_Subjects_v3_0_final AS all_Subjects_v3_0_final INNER JOIN UNNEST(all_Subjects_v3_0_final.ResearchSubject) AS _ResearchSubject INNER JOIN UNNEST(_ResearchSubject.Specimen) AS _ResearchSubject_Specimen INNER JOIN UNNEST(_ResearchSubject_Specimen.Files) AS _ResearchSubject_Specimen_Files INNER JOIN broad-dsde-dev.cda_dev.all_Files_v3_0_final AS all_Files_v3_0_final ON all_Files_v3_0_final.id = _ResearchSubject_Specimen_Files WHERE ((IFNULL(UPPER(_ResearchSubject_Specimen.specimen_type), '') = UPPER('slide')) OR (IFNULL(UPPER(all_Files_v3_0_final.data_type), '') = UPPER('Slide Image')))) as results WHERE rn = 1\n", - " Offset: 0\n", - " Count: 100\n", - " Total Row Count: 496454\n", - " More pages: True\n", - " " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mylist = Q('ResearchSubject.Specimen.specimen_type= \"slide\" OR file.data_type = \"Slide Image\"').specimen.file.run(filter=\"id\"\n", - ", show_sql=True)\n", - "mylist" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "658895d4-6a85-4224-a610-3cdd53b450d3", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4a5327089fdc40699a88fd043a569a01", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "p = mylist.auto_paginator(to_df=True,limit=20000)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b1b65a54-92aa-4d76-ab06-d9d8dd18ce33", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 496454 entries, 0 to 99\n", - "Data columns (total 1 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 496454 non-null object\n", - "dtypes: object(1)\n", - "memory usage: 7.6+ MB\n" - ] - } - ], - "source": [ - "p.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5182c3c-1338-445e-922c-40080f784254", - "metadata": {}, - "outputs": [], - "source": [ - "localhost = \"http://localhost:8080\"\n", - "d = unique_terms(\n", - " \"species\", host=localhost, table=\"gdc-bq-sample.dev\", show_sql=True\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "20d0de23-1a78-4bdc-bd28-0af00b6b44c0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
speciesCount
0canis familiaris57
1home sapiens1
2homo sapiens138023
3homo sapiens; mus musculus41
4internal reference - pooled sample1
5jhu qc1
6mus musculus183
7normal only ir1
8not reported19
9pnnl-jhu ref1
10ref1
11taiwanese ir1
12tumor only ir1
\n", - "
" - ], - "text/plain": [ - " species Count\n", - "0 canis familiaris 57\n", - "1 home sapiens 1\n", - "2 homo sapiens 138023\n", - "3 homo sapiens; mus musculus 41\n", - "4 internal reference - pooled sample 1\n", - "5 jhu qc 1\n", - "6 mus musculus 183\n", - "7 normal only ir 1\n", - "8 not reported 19\n", - "9 pnnl-jhu ref 1\n", - "10 ref 1\n", - "11 taiwanese ir 1\n", - "12 tumor only ir 1" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81f0a38a-3cdf-4330-82bd-850cf2c094d6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "d = Q('File.associated_project = \"%cptac%\"').file.run(limit=2000,async_call=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a081b6a-1f56-44a8-98de-7fc322092cbc", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.13" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/example-tester.ipynb b/notebooks/example-tester.ipynb deleted file mode 100644 index bca7d28d..00000000 --- a/notebooks/example-tester.ipynb +++ /dev/null @@ -1,1120 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "51e04dd6", - "metadata": {}, - "source": [ - "# CDA Python: Features & Examples\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "6c459428", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b113462f9487430e9d87a8b89b452359", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Dropdown(description='Tester:', options=(True, False), value=True)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from ipywidgets import Dropdown\n", - "from IPython.display import display\n", - "tester_check = Dropdown(\n", - " options=[True,False],\n", - " description='Tester:',\n", - " value=True,\n", - ")\n", - "display(tester_check)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "2255e2ad", - "metadata": {}, - "source": [ - "The following examples illustrate some ```CDA Python``` features while providing insights into the underlying data structure (**Getting started**). To demonstrate those features, we provide a few relevant text queries along with step-by-step explanations on how to translate those into the ```CDA Python``` queries (**Example queries**). Finally, there are a few additional queries intended for the test users to play around with and send feedback to the CDA team (**Test queries**)." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "f078fc5f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
/opt/conda/lib/python3.11/site-packages/cdapython/__init__.py\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[35m/opt/conda/lib/python3.11/site-packages/cdapython/\u001b[0m\u001b[95m__init__.py\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
2023.5.4\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m2023.5\u001b[0m.\u001b[1;36m4\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import Q, columns, unique_terms\n", - "import cdapython,pandas as pd\n", - "print(cdapython.__file__)\n", - "print(cdapython.__version__)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "ecdad705", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                            Total execution time: 0\n",
-       "                            min 8.817 sec 8817 ms\n",
-       "                            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m8.817\u001b[0m sec \u001b[1;36m8817\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "a = Q(\"sex = 'male'\").run()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "01f58251", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
subject_idsubject_identifierspeciessexraceethnicitydays_to_birthsubject_associated_projectvital_statusdays_to_deathcause_of_death
0BEATAML1.0.1039[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalenot reportednot reportedNaN[BEATAML1.0-CRENOLANIB]DeadNaNNone
1BEATAML1.0.2050[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmaleasiannot hispanic or latinoNaN[BEATAML1.0-COHORT]UnknownNaNNone
2BEATAML1.0.2116[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmaleUnknownnot hispanic or latinoNaN[BEATAML1.0-COHORT]DeadNaNNone
3BEATAML1.0.2285[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmaleUnknownUnknownNaN[BEATAML1.0-COHORT]AliveNaNNone
4BEATAML1.0.2295[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latinoNaN[BEATAML1.0-COHORT]DeadNaNNone
....................................
95FM.AD9848[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalenot reportednot reportedNaN[FM-AD]Not ReportedNaNNone
96GENIE.GENIE-DFCI-000475[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latino-21549.0[GENIE-DFCI]Not ReportedNaNNone
97GENIE.GENIE-DFCI-001317[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latino-26298.0[GENIE-DFCI]Not ReportedNaNNone
98GENIE.GENIE-DFCI-001483[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latino-22280.0[GENIE-DFCI]Not ReportedNaNNone
99GENIE.GENIE-DFCI-001844[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latino-21915.0[GENIE-DFCI]Not ReportedNaNNone
\n", - "

100 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " subject_id \\\n", - "0 BEATAML1.0.1039 \n", - "1 BEATAML1.0.2050 \n", - "2 BEATAML1.0.2116 \n", - "3 BEATAML1.0.2285 \n", - "4 BEATAML1.0.2295 \n", - ".. ... \n", - "95 FM.AD9848 \n", - "96 GENIE.GENIE-DFCI-000475 \n", - "97 GENIE.GENIE-DFCI-001317 \n", - "98 GENIE.GENIE-DFCI-001483 \n", - "99 GENIE.GENIE-DFCI-001844 \n", - "\n", - " subject_identifier species sex \\\n", - "0 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "1 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "2 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "3 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "4 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - ".. ... ... ... \n", - "95 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "96 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "97 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "98 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "99 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "\n", - " race ethnicity days_to_birth \\\n", - "0 not reported not reported NaN \n", - "1 asian not hispanic or latino NaN \n", - "2 Unknown not hispanic or latino NaN \n", - "3 Unknown Unknown NaN \n", - "4 white not hispanic or latino NaN \n", - ".. ... ... ... \n", - "95 not reported not reported NaN \n", - "96 white not hispanic or latino -21549.0 \n", - "97 white not hispanic or latino -26298.0 \n", - "98 white not hispanic or latino -22280.0 \n", - "99 white not hispanic or latino -21915.0 \n", - "\n", - " subject_associated_project vital_status days_to_death cause_of_death \n", - "0 [BEATAML1.0-CRENOLANIB] Dead NaN None \n", - "1 [BEATAML1.0-COHORT] Unknown NaN None \n", - "2 [BEATAML1.0-COHORT] Dead NaN None \n", - "3 [BEATAML1.0-COHORT] Alive NaN None \n", - "4 [BEATAML1.0-COHORT] Dead NaN None \n", - ".. ... ... ... ... \n", - "95 [FM-AD] Not Reported NaN None \n", - "96 [GENIE-DFCI] Not Reported NaN None \n", - "97 [GENIE-DFCI] Not Reported NaN None \n", - "98 [GENIE-DFCI] Not Reported NaN None \n", - "99 [GENIE-DFCI] Not Reported NaN None \n", - "\n", - "[100 rows x 11 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a.to_dataframe()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f759a7bb", - "metadata": {}, - "source": [ - "## Getting started" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "040aa164", - "metadata": {}, - "source": [ - "Print out the list of available fields with ```columns()```:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1a1b8cbd", - "metadata": {}, - "outputs": [], - "source": [ - "columns()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "11a24e81", - "metadata": {}, - "source": [ - "All of the above fields are what describes the highest entity in the data structure hierarchy – ```Patient``` entity. The first five fields represent ```Patient``` demographic information, while the ```ResearchSubject``` entity contains details that we are used to seeing within the nodes' ```Case``` record.\n", - "\n", - "One of the contributions of the CDA is aggregated ```ResearchSubject``` information. This means that all ```ResearchSubject``` records coming from the same subject are now gathered under the Patient entity. As we know, certain specimens are studied in multiple projects (being part of a single data node or multiple nodes) as different ```ResearchSubject``` entries. Those ```ResearchSubject``` entries are collected as a list under the ```ResearchSubject``` entity. One example of this is the patient record with ```id = TCGA-E2-A10A``` which contains two ```ResearchSubject``` entries, one from GDC and the other from PDC.\n", - "\n", - "Note that the ```ResearchSubject``` entity is a list of records, as many other entities above are. **There are certain considerations that should be made when creating the queries by using the fields that come from lists, but more about that will follow in examples below**.\n", - "\n", - "The names in the list may look familiar to you, but they may have been renamed or restructured in the CDA. The field name mappings are described in the _CDA Schema Field Mapping_ document that is linked in the _Testing Guide_. A more direct way to explore and understand the fields is to use the ```unique_terms()``` function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6f1b8ae8", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.Specimen.source_material_type\",limit=10)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "06c24c59", - "metadata": {}, - "source": [ - "Additionally, you can specify a particular data node by using the ```system``` argument:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2010208d", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.Specimen.source_material_type\", system=\"PDC\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b99dff03", - "metadata": {}, - "source": [ - "Now, let's dive into the querying!\n", - "\n", - "We can start by getting the record for ```id = TCGA-E2-A10A``` that we mentioned earlier:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "33e967c1", - "metadata": {}, - "outputs": [], - "source": [ - "q = Q('id = \"TCGA-E2-A10A\"') # note the double quotes for the string value\n", - "\n", - "r = q.counts(host=\"http://35.192.60.10:8080/\")\n", - "\n", - "\n", - "print(r)\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5c8cedfd", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "61e51afd", - "metadata": {}, - "source": [ - "We see that we've got a single patient record as a result, which is what we expect.\n", - "\n", - "Let's see how the result looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b57f607a", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "r[0]" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "7ab4a726", - "metadata": {}, - "source": [ - "The record is pretty large, so we'll print out ```identifier``` values for each ```ResearchSubject``` to confirm that we have one ```ResearchSubject``` that comes from GDC, and one that comes from PDC:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "016d24bf", - "metadata": {}, - "outputs": [], - "source": [ - "for research_subject in r[0]['ResearchSubject']:\n", - " print(research_subject['identifier'])" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "5a80105b", - "metadata": {}, - "source": [ - "The values represent ```ResearchSubject``` IDs and are equivalent to ```case_id``` values in data nodes." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "438717f3", - "metadata": {}, - "source": [ - "## Example queries" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "052970b9", - "metadata": {}, - "source": [ - "Now that we can create a query with ```Q()``` function, let's see how we can combine multiple conditions.\n", - "\n", - "There are three operators available:\n", - "* ```And()```\n", - "* ```Or()```\n", - "* ```From()```\n", - "\n", - "The following examples show how those operators work in practice." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "2ebb5e06", - "metadata": {}, - "source": [ - "### Query 1\n", - "\n", - "**Find data for subjects who were diagnosed after the age of 50 and who were investigated as part of the TCGA-OV project.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43d709a5", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Diagnosis.age_at_diagnosis > 50*365')\n", - "q2 = Q('ResearchSubject.associated_project = \"TCGA-OV\"')\n", - "\n", - "q = q1.AND(q2)\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c1dd55f6", - "metadata": {}, - "source": [ - "### Query 2\n", - "\n", - "**Find data for donors with melanoma (Nevi and Melanomas) diagnosis and who were diagnosed before the age of 30.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "521d0088", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "\n", - "q = q1.AND(q2)\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d126823c", - "metadata": {}, - "source": [ - "In addition, we can check how many records come from particular systems by adding one more condition to the query:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25d9258a", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "q3 = Q('ResearchSubject.Specimen.identifier.system = \"GDC\"')\n", - "\n", - "q = q1.AND(q2.AND(q3))\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c06e9a8d", - "metadata": {}, - "source": [ - "By comparing the ```Count``` value of the two results we can see that all the patients returned in the initial query are coming from the GDC.\n", - "\n", - "To explore the results further, we can fetch the patient JSON objects by iterating through the results:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4cb20150", - "metadata": {}, - "outputs": [], - "source": [ - "projects = set()\n", - "\n", - "for patient in r:\n", - " research_subjects = patient['ResearchSubject']\n", - " for rs in research_subjects:\n", - " projects.add(rs['associated_project'])\n", - "\n", - "print(projects)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "6d8f8cc3", - "metadata": {}, - "source": [ - "The output shows the projects where _Nevi and Melanomas_ cases appear." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "12c650a3", - "metadata": {}, - "source": [ - "### Query 3\n", - "\n", - "**Identify all samples that meet the following conditions:**\n", - "\n", - "* **Sample is from primary tumor**\n", - "* **Disease is ovarian or breast cancer**\n", - "* **Subjects are females under the age of 60 years**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "95920a42", - "metadata": {}, - "outputs": [], - "source": [ - "tumor_type = Q('ResearchSubject.Specimen.source_material_type = \"Primary Tumor\"')\n", - "disease1 = Q('ResearchSubject.primary_disease_site = \"Ovary\"')\n", - "disease2 = Q('ResearchSubject.primary_disease_site = \"Breast\"')\n", - "demographics1 = Q('sex = \"female\"')\n", - "demographics2 = Q('days_to_birth > -60*365') # note that days_to_birth is a negative value\n", - "\n", - "q1 = tumor_type.AND(demographics1.AND(demographics2))\n", - "q2 = disease1.OR(disease2)\n", - "q = q1.AND(q2)\n", - "\n", - "r = q.run()\n", - "print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "713ac984", - "metadata": {}, - "source": [ - "In this case, we have a result that contains more than 1000 records which is the default page size. To load the next 1000 records, we can use the ```next_page()``` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "efdadbb9", - "metadata": {}, - "outputs": [], - "source": [ - "r2 = r.next_page()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1419fc7", - "metadata": {}, - "outputs": [], - "source": [ - "print(r2)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a960e07d", - "metadata": {}, - "source": [ - "Alternatively, we can use the ```offset``` argument to specify the record to start from:\n", - "\n", - "```\n", - "...\n", - "r = q.run(offset=1000)\n", - "print(r)\n", - "```" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "41db78e2", - "metadata": {}, - "source": [ - "### Query 4\n", - "\n", - "**Find data for donors with \"Ovarian Serous Cystadenocarcinoma\" with proteomic and genomic data.**" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b9171d93", - "metadata": {}, - "source": [ - "**Note that disease type value denoting the same disease groups can be completely different within different systems. This is where CDA features come into play.** We first start by exploring the values available for this particular field in both systems." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "33d8294f", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms('ResearchSubject.primary_disease_type', system=\"GDC\",limit=10)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "7495d1e4", - "metadata": {}, - "source": [ - "Since “Ovarian Serous Cystadenocarcinoma” doesn’t appear in GDC values we decide to look into the PDC:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "aac21adf", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms('ResearchSubject.primary_disease_type', system=\"PDC\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a2ec177f", - "metadata": {}, - "source": [ - "After examining the output, we see that it does come from the PDC. Hence, if we could first identify the data that has research subjects found within the PDC that have this particular disease type, and then further narrow down the results to include only the portion of the data that is present in GDC, we could get the records that we are looking for." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "19a05c04", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 3.927 sec 3927 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m3.927\u001b[0m sec \u001b[1;36m3927\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\n", - " Query:SELECT results.* EXCEPT(rn) FROM (SELECT ROW_NUMBER() OVER (PARTITION BY Subject.id) as rn, Subject.id AS subject_id, Subject.identifier AS subject_identifier, Subject.species AS species, Subject.sex AS sex, Subject.race AS race, Subject.ethnicity AS ethnicity, Subject.days_to_birth AS days_to_birth, Subject.subject_associated_project AS subject_associated_project, Subject.vital_status AS vital_status, Subject.days_to_death AS days_to_death, Subject.cause_of_death AS cause_of_death FROM gdc-bq-sample.dev.all_Subjects_v3_0_final AS Subject LEFT JOIN UNNEST(Subject.identifier) AS _subject_identifier WHERE (IFNULL(UPPER(_subject_identifier.system), '') = UPPER('GDC'))) as results WHERE rn = 1\n", - " Offset: 0\n", - " Count: 100\n", - " Total Row Count: 85464\n", - " More pages: True\n", - " " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# q1 = Q('ResearchSubject.primary_disease_type = \"Ovarian Serous Cystadenocarcinoma\"')\n", - "# q2 = Q('ResearchSubject.identifier.system = \"PDC\"')\n", - "# q3 = Q('ResearchSubject.identifier.system = \"GDC\"')\n", - "\n", - "# q = q3.FROM(q1.AND(q2))\n", - "# r = q.run()\n", - "\n", - "# print(r)\n", - "\n", - "\n", - "from cdapython import Q\n", - "\n", - "\n", - "integration_host = \"http://35.192.60.10:8080/\"\n", - "integration_table = \"gdc-bq-sample.dev\"\n", - "Q('subject_identifier_system = \"GDC\" FROM subject_identifier_system = \"PDC\" FROM subject_identifier_system = \"IDC\"').run(host=integration_host,table=integration_table,show_sql=True)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b1d3d6a0", - "metadata": {}, - "source": [ - "As you can see, this is achieved by utilizing ```From``` operator. The ```From``` operator allows us to create queries from results of other queries. This is particularly useful when working with conditions that involve a single field which can take multiple different values for different items in a list that is being part of, e.g. we need ```ResearchSubject.identifier.system``` to be both “PDC” and “GDC” for a single patient. In such cases, ```And``` operator can’t help because it will return those entries where the field takes both values, which is zero entries." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5ec883f", - "metadata": {}, - "outputs": [], - "source": [ - "for i in Q.sql(\"SELECT * FROM `gdc-bq-sample.cda_mvp.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` WHERE table_name = 'v3' Limit 5\"):\n", - " print(i)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc992d2a", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.identifier.system = \"GDC\" FROM ResearchSubject.primary_disease_type = \"Ovarian Serous Cystadenocarcinoma\" AND ResearchSubject.identifier.system = \"PDC\"')\n", - "result = q1.run(async_call=True)\n", - "print(result)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "89b96ee0", - "metadata": {}, - "source": [ - "## Test queries" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3cdcf002", - "metadata": {}, - "source": [ - "Now that we've successfully run and analyzed a few queries, here are a few additional ones you can try out on your own.\n", - "\n", - "Solutions can be shared with the CDA team as indicated in the _Testing Guide_ document." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f5fee345", - "metadata": {}, - "source": [ - "### Test Query 1\n", - "\n", - "**Find data from TCGA-BRCA project, with donors over the age of 50 with Stage IIIC cancer.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92ccb890", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4fc36929", - "metadata": {}, - "source": [ - "### Test Query 2\n", - "\n", - "**Find data from all patients who have been treated with \"Radiation Therapy, NOS\" and have both genomic and proteomic data.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a987983", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "e11f76f7", - "metadata": {}, - "source": [ - "### Test Query 3\n", - "\n", - "**Find data from all subjects with lung adenocarcinomas that have both primary and recurrent tumors.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2cbcd5e", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a9045266-ecda-4692-b732-d4b8c660801a", - "metadata": {}, - "outputs": [], - "source": [ - "q = Q('id = \"TCGA-13-1409\"')\n", - "r = q.run(limit = 20, host=\"http://localhost:8080\")\n", - "# r.to_dataframe(\n", - "# record_path =['ResearchSubject','File'],\n", - "# meta=['id','species','sex','race','ethnicity'],\n", - "# meta_prefix= 'subject_', \n", - "# max_level = 0,\n", - "# errors='ignore'\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8a346016-6ab6-4e02-94ad-22bd8163bbb9", - "metadata": {}, - "outputs": [], - "source": [ - "r.to_dataframe(record_path=['ResearchSubject'],meta=['id','species','sex','race','ethnicity'],meta_prefix= 'subject_',)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5475f4cd-6ce2-4ce7-b3f7-a292e31c6e76", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "q = q1.AND(q2)\n", - "print(q)\n", - "r = q.run(host=\"http://localhost:8080\")\n", - "print(r)" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - }, - "metadata": { - "interpreter": { - "hash": "5c867d1980d5b66d2bfc8a5903dcee074b4b68f3917a4f27f8a310cab24e9f1b" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb deleted file mode 100644 index 86fe5588..00000000 --- a/notebooks/example.ipynb +++ /dev/null @@ -1,825 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "8ee54fa1", - "metadata": {}, - "source": [ - "# CDA Python: Features & Examples\n", - "---" - ] - }, - { - "cell_type": "markdown", - "id": "35217356", - "metadata": {}, - "source": [ - "The following examples illustrate some ```CDA Python``` features while providing insights into the underlying data structure (**Getting started**). To demonstrate those features, we provide a few relevant text queries along with step-by-step explanations on how to translate those into the ```CDA Python``` queries (**Example queries**). Finally, there are a few additional queries intended for the test users to play around with and send feedback to the CDA team (**Test queries**)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5245b7b0", - "metadata": {}, - "outputs": [], - "source": [ - "from cdapython import Q, columns, unique_terms,query\n", - "import cdapython\n", - "print(cdapython.__file__)\n", - "print(cdapython.__version__)" - ] - }, - { - "cell_type": "markdown", - "id": "bccff533", - "metadata": {}, - "source": [ - "## Getting started" - ] - }, - { - "cell_type": "markdown", - "id": "36d1d3ec", - "metadata": {}, - "source": [ - "Print out the list of available fields with ```columns()```:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b83e40e6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/dboles/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/urllib3/connectionpool.py:1020: InsecureRequestWarning: Unverified HTTPS request is being made to host 'cda.cda-dev.broadinstitute.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n", - " InsecureRequestWarning,\n" - ] - }, - { - "data": { - "text/plain": [ - "['id',\n", - " 'identifier',\n", - " 'identifier.system',\n", - " 'identifier.value',\n", - " 'sex',\n", - " 'race',\n", - " 'ethnicity',\n", - " 'days_to_birth',\n", - " 'subject_associated_project',\n", - " 'File',\n", - " 'File.id',\n", - " 'File.identifier',\n", - " 'File.identifier.system',\n", - " 'File.identifier.value',\n", - " 'File.label',\n", - " 'File.data_category',\n", - " 'File.data_type',\n", - " 'File.file_format',\n", - " 'File.associated_project',\n", - " 'File.drs_uri',\n", - " 'File.byte_size',\n", - " 'File.checksum',\n", - " 'ResearchSubject',\n", - " 'ResearchSubject.id',\n", - " 'ResearchSubject.identifier',\n", - " 'ResearchSubject.identifier.system',\n", - " 'ResearchSubject.identifier.value',\n", - " 'ResearchSubject.associated_project',\n", - " 'ResearchSubject.primary_disease_type',\n", - " 'ResearchSubject.primary_disease_site',\n", - " 'ResearchSubject.Diagnosis',\n", - " 'ResearchSubject.Diagnosis.id',\n", - " 'ResearchSubject.Diagnosis.identifier',\n", - " 'ResearchSubject.Diagnosis.identifier.system',\n", - " 'ResearchSubject.Diagnosis.identifier.value',\n", - " 'ResearchSubject.Diagnosis.primary_diagnosis',\n", - " 'ResearchSubject.Diagnosis.age_at_diagnosis',\n", - " 'ResearchSubject.Diagnosis.morphology',\n", - " 'ResearchSubject.Diagnosis.stage',\n", - " 'ResearchSubject.Diagnosis.grade',\n", - " 'ResearchSubject.Diagnosis.Treatment',\n", - " 'ResearchSubject.Diagnosis.Treatment.id',\n", - " 'ResearchSubject.Diagnosis.Treatment.identifier',\n", - " 'ResearchSubject.Diagnosis.Treatment.identifier.system',\n", - " 'ResearchSubject.Diagnosis.Treatment.identifier.value',\n", - " 'ResearchSubject.Diagnosis.Treatment.treatment_type',\n", - " 'ResearchSubject.Diagnosis.Treatment.treatment_outcome',\n", - " 'ResearchSubject.Diagnosis.Treatment.days_to_treatment_start',\n", - " 'ResearchSubject.Diagnosis.Treatment.days_treatment_end',\n", - " 'ResearchSubject.File',\n", - " 'ResearchSubject.File.id',\n", - " 'ResearchSubject.File.identifier',\n", - " 'ResearchSubject.File.identifier.system',\n", - " 'ResearchSubject.File.identifier.value',\n", - " 'ResearchSubject.File.label',\n", - " 'ResearchSubject.File.data_category',\n", - " 'ResearchSubject.File.data_type',\n", - " 'ResearchSubject.File.file_format',\n", - " 'ResearchSubject.File.associated_project',\n", - " 'ResearchSubject.File.drs_uri',\n", - " 'ResearchSubject.File.byte_size',\n", - " 'ResearchSubject.File.checksum',\n", - " 'ResearchSubject.Specimen',\n", - " 'ResearchSubject.Specimen.id',\n", - " 'ResearchSubject.Specimen.identifier',\n", - " 'ResearchSubject.Specimen.identifier.system',\n", - " 'ResearchSubject.Specimen.identifier.value',\n", - " 'ResearchSubject.Specimen.associated_project',\n", - " 'ResearchSubject.Specimen.age_at_collection',\n", - " 'ResearchSubject.Specimen.primary_disease_type',\n", - " 'ResearchSubject.Specimen.anatomical_site',\n", - " 'ResearchSubject.Specimen.source_material_type',\n", - " 'ResearchSubject.Specimen.specimen_type',\n", - " 'ResearchSubject.Specimen.derived_from_specimen',\n", - " 'ResearchSubject.Specimen.derived_from_subject',\n", - " 'ResearchSubject.Specimen.File',\n", - " 'ResearchSubject.Specimen.File.id',\n", - " 'ResearchSubject.Specimen.File.identifier',\n", - " 'ResearchSubject.Specimen.File.identifier.system',\n", - " 'ResearchSubject.Specimen.File.identifier.value',\n", - " 'ResearchSubject.Specimen.File.label',\n", - " 'ResearchSubject.Specimen.File.data_category',\n", - " 'ResearchSubject.Specimen.File.data_type',\n", - " 'ResearchSubject.Specimen.File.file_format',\n", - " 'ResearchSubject.Specimen.File.associated_project',\n", - " 'ResearchSubject.Specimen.File.drs_uri',\n", - " 'ResearchSubject.Specimen.File.byte_size',\n", - " 'ResearchSubject.Specimen.File.checksum']" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "columns(verify=False)" - ] - }, - { - "cell_type": "markdown", - "id": "26229d50", - "metadata": {}, - "source": [ - "All of the above fields are what describes the highest entity in the data structure hierarchy – ```Patient``` entity. The first five fields represent ```Patient``` demographic information, while the ```ResearchSubject``` entity contains details that we are used to seeing within the nodes' ```Case``` record.\n", - "\n", - "One of the contributions of the CDA is aggregated ```ResearchSubject``` information. This means that all ```ResearchSubject``` records coming from the same subject are now gathered under the Patient entity. As we know, certain specimens are studied in multiple projects (being part of a single data node or multiple nodes) as different ```ResearchSubject``` entries. Those ```ResearchSubject``` entries are collected as a list under the ```ResearchSubject``` entity. One example of this is the patient record with ```id = TCGA-E2-A10A``` which contains two ```ResearchSubject``` entries, one from GDC and the other from PDC.\n", - "\n", - "Note that the ```ResearchSubject``` entity is a list of records, as many other entities above are. **There are certain considerations that should be made when creating the queries by using the fields that come from lists, but more about that will follow in examples below**.\n", - "\n", - "The names in the list may look familiar to you, but they may have been renamed or restructured in the CDA. The field name mappings are described in the _CDA Schema Field Mapping_ document that is linked in the _Testing Guide_. A more direct way to explore and understand the fields is to use the ```unique_terms()``` function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47da0214", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.Specimen.source_material_type\",limit=10)" - ] - }, - { - "cell_type": "markdown", - "id": "e96d2c77", - "metadata": {}, - "source": [ - "Additionally, you can specify a particular data node by using the ```system``` argument:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f922031e", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.Specimen.source_material_type\", system=\"PDC\")" - ] - }, - { - "cell_type": "markdown", - "id": "1f6396f2", - "metadata": {}, - "source": [ - "Now, let's dive into the querying!\n", - "\n", - "We can start by getting the record for ```id = TCGA-E2-A10A``` that we mentioned earlier:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ac11f401", - "metadata": {}, - "outputs": [], - "source": [ - "q = Q('id = \"TCGA-E2-A10A\"') # note the double quotes for the string value\n", - "\n", - "r = q.run()\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3936e82a", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "b83e303d", - "metadata": {}, - "source": [ - "We see that we've got a single patient record as a result, which is what we expect.\n", - "\n", - "Let's see how the result looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1aa11a6f", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "r[0]" - ] - }, - { - "cell_type": "markdown", - "id": "e97085f8", - "metadata": {}, - "source": [ - "The record is pretty large, so we'll print out ```identifier``` values for each ```ResearchSubject``` to confirm that we have one ```ResearchSubject``` that comes from GDC, and one that comes from PDC:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "88625cfa", - "metadata": {}, - "outputs": [], - "source": [ - "for research_subject in r[0]['ResearchSubject']:\n", - " print(research_subject['identifier'])" - ] - }, - { - "cell_type": "markdown", - "id": "8b48f8d2", - "metadata": {}, - "source": [ - "The values represent ```ResearchSubject``` IDs and are equivalent to ```case_id``` values in data nodes." - ] - }, - { - "cell_type": "markdown", - "id": "3a783bd7", - "metadata": {}, - "source": [ - "## Example queries" - ] - }, - { - "cell_type": "markdown", - "id": "c215820a", - "metadata": {}, - "source": [ - "Now that we can create a query with ```Q()``` function, let's see how we can combine multiple conditions.\n", - "\n", - "There are three operators available:\n", - "* ```And()```\n", - "* ```Or()```\n", - "* ```From()```\n", - "\n", - "The following examples show how those operators work in practice." - ] - }, - { - "cell_type": "markdown", - "id": "77acef39", - "metadata": {}, - "source": [ - "### Query 1\n", - "\n", - "**Find data for subjects who were diagnosed after the age of 50 and who were investigated as part of the TCGA-OV project.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d6da17a6", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Diagnosis.age_at_diagnosis > 50*365')\n", - "q2 = Q('ResearchSubject.associated_project = \"TCGA-OV\"')\n", - "\n", - "q = q1.AND(q2)\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "3d13b766", - "metadata": {}, - "source": [ - "### Query 2\n", - "\n", - "**Find data for donors with melanoma (Nevi and Melanomas) diagnosis and who were diagnosed before the age of 30.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6ce3248", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "\n", - "q = q1.AND(q2)\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "7621e34d", - "metadata": {}, - "source": [ - "In addition, we can check how many records come from particular systems by adding one more condition to the query:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "45ad21fd", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "q3 = Q('ResearchSubject.Specimen.identifier.system = \"GDC\"')\n", - "\n", - "q = q1.AND(q2.AND(q3))\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "036bc88d", - "metadata": {}, - "source": [ - "By comparing the ```Count``` value of the two results we can see that all the patients returned in the initial query are coming from the GDC.\n", - "\n", - "To explore the results further, we can fetch the patient JSON objects by iterating through the results:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "df5deb25", - "metadata": {}, - "outputs": [], - "source": [ - "projects = set()\n", - "\n", - "for patient in r:\n", - " research_subjects = patient['ResearchSubject']\n", - " for rs in research_subjects:\n", - " projects.add(rs['associated_project'])\n", - "\n", - "print(projects)" - ] - }, - { - "cell_type": "markdown", - "id": "27df4ffc", - "metadata": {}, - "source": [ - "The output shows the projects where _Nevi and Melanomas_ cases appear." - ] - }, - { - "cell_type": "markdown", - "id": "ee0955c4", - "metadata": {}, - "source": [ - "### Query 3\n", - "\n", - "**Identify all samples that meet the following conditions:**\n", - "\n", - "* **Sample is from primary tumor**\n", - "* **Disease is ovarian or breast cancer**\n", - "* **Subjects are females under the age of 60 years**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e6709c55", - "metadata": {}, - "outputs": [], - "source": [ - "tumor_type = Q('ResearchSubject.Specimen.source_material_type = \"Primary Tumor\"')\n", - "disease1 = Q('ResearchSubject.primary_disease_site = \"Ovary\"')\n", - "disease2 = Q('ResearchSubject.primary_disease_site = \"Breast\"')\n", - "demographics1 = Q('sex = \"female\"')\n", - "demographics2 = Q('days_to_birth > -60*365') # note that days_to_birth is a negative value\n", - "\n", - "q1 = tumor_type.AND(demographics1.AND(demographics2))\n", - "q2 = disease1.OR(disease2)\n", - "q = q1.AND(q2)\n", - "\n", - "r = q.run()\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "22e0e66a", - "metadata": {}, - "source": [ - "In this case, we have a result that contains more than 1000 records which is the default page size. To load the next 1000 records, we can use the ```next_page()``` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c62c6074", - "metadata": {}, - "outputs": [], - "source": [ - "r2 = r.next_page()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0376b0d6", - "metadata": {}, - "outputs": [], - "source": [ - "print(r2)" - ] - }, - { - "cell_type": "markdown", - "id": "72b1f71b", - "metadata": {}, - "source": [ - "Alternatively, we can use the ```offset``` argument to specify the record to start from:\n", - "\n", - "```\n", - "...\n", - "r = q.run(offset=1000)\n", - "print(r)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "bd653b13", - "metadata": {}, - "source": [ - "### Query 4\n", - "\n", - "**Find data for donors with \"Ovarian Serous Cystadenocarcinoma\" with proteomic and genomic data.**" - ] - }, - { - "cell_type": "markdown", - "id": "92e0f930", - "metadata": {}, - "source": [ - "**Note that disease type value denoting the same disease groups can be completely different within different systems. This is where CDA features come into play.** We first start by exploring the values available for this particular field in both systems." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "cd201350", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/dboles/Documents/python/working/cda-python/cdapython/decorators_cache.py:18: ResourceWarning: unclosed \n", - " func.cache_clear()\n", - "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n" - ] - }, - { - "data": { - "text/plain": [ - "[None,\n", - " 'Acinar Cell Neoplasms',\n", - " 'Adenomas and Adenocarcinomas',\n", - " 'Adnexal and Skin Appendage Neoplasms',\n", - " 'Basal Cell Neoplasms',\n", - " 'Blood Vessel Tumors',\n", - " 'Chronic Myeloproliferative Disorders',\n", - " 'Complex Epithelial Neoplasms',\n", - " 'Complex Mixed and Stromal Neoplasms',\n", - " 'Cystic, Mucinous and Serous Neoplasms']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "unique_terms('ResearchSubject.primary_disease_type', system=\"GDC\",limit=10,verify=False)" - ] - }, - { - "cell_type": "markdown", - "id": "b3eecdd5", - "metadata": {}, - "source": [ - "Since “Ovarian Serous Cystadenocarcinoma” doesn’t appear in GDC values we decide to look into the PDC:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "706f0301", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms('ResearchSubject.primary_disease_type', system=\"PDC\")" - ] - }, - { - "cell_type": "markdown", - "id": "3a3476ed", - "metadata": {}, - "source": [ - "After examining the output, we see that it does come from the PDC. Hence, if we could first identify the data that has research subjects found within the PDC that have this particular disease type, and then further narrow down the results to include only the portion of the data that is present in GDC, we could get the records that we are looking for." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "637640e2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Getting results from database\n", - "\n", - "Total execution time: 27307 ms\n", - "\n", - " QueryID: bd084bbd-33bd-4339-b034-b620192922b1\n", - " Query: SELECT all_v2.* FROM (SELECT all_v2.* FROM gdc-bq-sample.integration.all_v2 AS all_v2, UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier WHERE ((_ResearchSubject.primary_disease_type = 'Ovarian Serous Cystadenocarcinoma') AND (_identifier.system = 'PDC'))) AS all_v2, UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier WHERE (_identifier.system = 'GDC')\n", - " Offset: 0\n", - " Count: 100\n", - " Total Row Count: 275\n", - " More pages: True\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/dboles/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/ipykernel_launcher.py:6: ResourceWarning: unclosed \n", - " \n", - "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n" - ] - } - ], - "source": [ - "q1 = Q('ResearchSubject.primary_disease_type = \"Ovarian Serous Cystadenocarcinoma\"')\n", - "q2 = Q('ResearchSubject.identifier.system = \"PDC\"')\n", - "q3 = Q('ResearchSubject.identifier.system = \"GDC\"')\n", - "\n", - "q = q3.FROM(q1.AND(q2))\n", - "r = q.run(verify=False)\n", - "\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "9ea451f4", - "metadata": {}, - "source": [ - "As you can see, this is achieved by utilizing ```From``` operator. The ```From``` operator allows us to create queries from results of other queries. This is particularly useful when working with conditions that involve a single field which can take multiple different values for different items in a list that is being part of, e.g. we need ```ResearchSubject.identifier.system``` to be both “PDC” and “GDC” for a single patient. In such cases, ```And``` operator can’t help because it will return those entries where the field takes both values, which is zero entries." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "da890a83", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'days_to_birth', 'field_path': 'days_to_birth', 'data_type': 'INT64', 'description': None}\n", - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'race', 'field_path': 'race', 'data_type': 'STRING', 'description': None}\n", - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'sex', 'field_path': 'sex', 'data_type': 'STRING', 'description': None}\n", - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'ethnicity', 'field_path': 'ethnicity', 'data_type': 'STRING', 'description': None}\n", - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'id', 'field_path': 'id', 'data_type': 'STRING', 'description': None}\n" - ] - } - ], - "source": [ - "for i in Q.sql(\"SELECT * FROM `gdc-bq-sample.cda_mvp.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` WHERE table_name = 'v3' Limit 5\",verify=False):\n", - " print(i)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "231e5519", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = query('ResearchSubject.identifier.system = \"GDC\" FROM ResearchSubject.primary_disease_type = \"Ovarian Serous Cystadenocarcinoma\" AND ResearchSubject.identifier.system = \"PDC\"')\n", - "result = q1.run(async_call=True)\n", - "print(result)\n" - ] - }, - { - "cell_type": "markdown", - "id": "716356cc", - "metadata": {}, - "source": [ - "## Data extraction and release information" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d420653", - "metadata": {}, - "outputs": [], - "source": [ - "# If you are interested in the extraction dates or data release versions of GDC, PDC, or IDC that is in a table or view, execute this code\n", - "\n", - "for i in Q.sql(\"SELECT option_value FROM `gdc-bq-sample.integration.INFORMATION_SCHEMA.TABLE_OPTIONS` WHERE table_name = 'all_v1'\"):\n", - " print(i)" - ] - }, - { - "cell_type": "markdown", - "id": "e1a235eb", - "metadata": {}, - "source": [ - "## Test queries" - ] - }, - { - "cell_type": "markdown", - "id": "9740d86e", - "metadata": {}, - "source": [ - "Now that we've successfully run and analyzed a few queries, here are a few additional ones you can try out on your own.\n", - "\n", - "Solutions can be shared with the CDA team as indicated in the _Testing Guide_ document." - ] - }, - { - "cell_type": "markdown", - "id": "484df50b", - "metadata": {}, - "source": [ - "### Test Query 1\n", - "\n", - "**Find data from TCGA-BRCA project, with donors over the age of 50 with Stage IIIC cancer.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "76e5ff7c", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "dfebb4f8", - "metadata": {}, - "source": [ - "### Test Query 2\n", - "\n", - "**Find data from all patients who have been treated with \"Radiation Therapy, NOS\" and have both genomic and proteomic data.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8fab696d", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "cf6e1ba8", - "metadata": {}, - "source": [ - "### Test Query 3\n", - "\n", - "**Find data from all subjects with lung adenocarcinomas that have both primary and recurrent tumors.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13b82aef", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a84d6fcc", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "Q('ResearchSubject.id = \"c5421e34-e5c7-4ba5-aed9-146a5575fd8d\"').run().pretty_print(-1) " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.10.4 ('venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - }, - "metadata": { - "interpreter": { - "hash": "5c867d1980d5b66d2bfc8a5903dcee074b4b68f3917a4f27f8a310cab24e9f1b" - } - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/testa.ipynb b/notebooks/testa.ipynb deleted file mode 100644 index cca9e32e..00000000 --- a/notebooks/testa.ipynb +++ /dev/null @@ -1,131 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
totalfilessystemsexraceethnicitycause_of_death
0{0: 65}{0: 45342}{0: [{'system': 'GDC', 'count': 65}, {'system'...{0: [{'sex': 'male', 'count': 47}, {'sex': 'fe...{0: [{'race': 'white', 'count': 38}, {'race': ...{0: [{'ethnicity': 'not hispanic or latino', '...{0: [{'cause_of_death': 'Not Reported', 'count...
\n", - "
" - ], - "text/plain": [ - " total files system \\\n", - "0 {0: 65} {0: 45342} {0: [{'system': 'GDC', 'count': 65}, {'system'... \n", - "\n", - " sex \\\n", - "0 {0: [{'sex': 'male', 'count': 47}, {'sex': 'fe... \n", - "\n", - " race \\\n", - "0 {0: [{'race': 'white', 'count': 38}, {'race': ... \n", - "\n", - " ethnicity \\\n", - "0 {0: [{'ethnicity': 'not hispanic or latino', '... \n", - "\n", - " cause_of_death \n", - "0 {0: [{'cause_of_death': 'Not Reported', 'count... " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd \n", - "d = pd.DataFrame(\n", - " [\n", - " {'total': {0: 65},\n", - " 'files': {0: 45342},\n", - " 'system': {0: [{'system': 'GDC', 'count': 65},\n", - " {'system': 'IDC', 'count': 65},\n", - " {'system': 'PDC', 'count': 65}]},\n", - " 'sex': {0: [{'sex': 'male', 'count': 47}, {'sex': 'female', 'count': 18}]},\n", - " 'race': {0: [{'race': 'white', 'count': 38},\n", - " {'race': 'not reported', 'count': 26},\n", - " {'race': 'asian', 'count': 1}]},\n", - " 'ethnicity': {0: [{'ethnicity': 'not hispanic or latino', 'count': 26},\n", - " {'ethnicity': 'not reported', 'count': 35},\n", - " {'ethnicity': 'hispanic or latino', 'count': 4}]},\n", - " 'cause_of_death': {0: [{'cause_of_death': 'Not Reported', 'count': 63},\n", - " {'cause_of_death': 'Cancer Related', 'count': 2}]}}\n", - " \n", - " \n", - " \n", - " ]\n", - "\n", - ")\n", - "\n", - "\n", - "d\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/testing_sql.ipynb b/notebooks/testing_sql.ipynb deleted file mode 100644 index 06c837e9..00000000 --- a/notebooks/testing_sql.ipynb +++ /dev/null @@ -1,125 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "33d30b80-163c-4b18-a745-6faecdb2d98c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2022.6.28\n", - "everything is fine\n" - ] - } - ], - "source": [ - "from matplotlib import pyplot\n", - "from cdapython import Q \n", - "print(Q.get_version())\n", - "print(Q.bigquery_status())\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "013b417f-674f-47de-a235-66d84c9f4897", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total execution time: 3628 ms\n" - ] - }, - { - "data": { - "text/plain": [ - "{'total': {0: 65},\n", - " 'files': {0: 45342},\n", - " 'system': {0: [{'system': 'GDC', 'count': 65},\n", - " {'system': 'IDC', 'count': 65},\n", - " {'system': 'PDC', 'count': 65}]},\n", - " 'sex': {0: [{'sex': 'male', 'count': 47}, {'sex': 'female', 'count': 18}]},\n", - " 'race': {0: [{'race': 'white', 'count': 38},\n", - " {'race': 'not reported', 'count': 26},\n", - " {'race': 'asian', 'count': 1}]},\n", - " 'ethnicity': {0: [{'ethnicity': 'not hispanic or latino', 'count': 26},\n", - " {'ethnicity': 'not reported', 'count': 35},\n", - " {'ethnicity': 'hispanic or latino', 'count': 4}]},\n", - " 'cause_of_death': {0: [{'cause_of_death': 'Not Reported', 'count': 63},\n", - " {'cause_of_death': 'Cancer Related', 'count': 2}]}}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pandas import json_normalize\n", - "q1 = Q('ResearchSubject.Diagnosis.stage = \"Stage I\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.stage = \"Stage II\"')\n", - "q3 = Q(\"ResearchSubject.primary_diagnosis_site = 'Kidney'\")\n", - "q_diag = q1.OR(q2)\n", - "q = q_diag.AND(q3)\n", - "# print(q.counts.run())\n", - "qsub = q.subject.count.run(show_sql=True)\n", - "%matplotlib inline\n", - "qsub.to_dataframe().to_dict()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4f5cd0f-b990-4096-a91f-c84997019d39", - "metadata": {}, - "outputs": [], - "source": [ - "\n" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.14" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 72ddd9fea448765e86dc53d89f3b7824f7ff7ec3 Mon Sep 17 00:00:00 2001 From: Amanda Charbonneau Date: Thu, 7 Dec 2023 11:42:53 -0500 Subject: [PATCH 13/13] removing stale files and fixing gitignore --- .DS_Store | Bin 14340 -> 0 bytes .gitignore | 84 +- DataSummaries.ipynb | 1754 ------------------------------- Untitled-2.ipynb | 155 --- Untitled.ipynb | 389 ------- Untitled1.ipynb | 457 -------- mutation_tests (1).ipynb | 410 -------- notebooks/.DS_Store | Bin 6148 -> 0 bytes notebooks/BuildingACohort.ipynb | 1679 ----------------------------- notebooks/Untitled.ipynb | 1371 ------------------------ notebooks/Untitled1.ipynb | 411 -------- notebooks/example-tester.ipynb | 1120 -------------------- notebooks/example.ipynb | 825 --------------- notebooks/testa.ipynb | 131 --- notebooks/testing_sql.ipynb | 125 --- 15 files changed, 46 insertions(+), 8865 deletions(-) delete mode 100644 .DS_Store delete mode 100644 DataSummaries.ipynb delete mode 100644 Untitled-2.ipynb delete mode 100644 Untitled.ipynb delete mode 100644 Untitled1.ipynb delete mode 100644 mutation_tests (1).ipynb delete mode 100644 notebooks/.DS_Store delete mode 100644 notebooks/BuildingACohort.ipynb delete mode 100644 notebooks/Untitled.ipynb delete mode 100644 notebooks/Untitled1.ipynb delete mode 100644 notebooks/example-tester.ipynb delete mode 100644 notebooks/example.ipynb delete mode 100644 notebooks/testa.ipynb delete mode 100644 notebooks/testing_sql.ipynb diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index c3b0821acaed85f1b20fb97bba3f46ab7df9127b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14340 zcmeHN3v?9a6~1=}Ff&VF62fM8$zw@?NQg<6gpdG%B@ZCs5hMxWp~g*ig{_<2xU)&b zh*Ep1t*=&Ft<|cvt+raVzOWu2E!O%xJ$gzbg0GGSxwY){%IPz}Iq?8q=Qc{OQSKHcA4+5RP%6bxG$;m&;Z*Nt-Kq2%OQp4(JtS`j}pU^a>=e0K@(i+BSQ_5fv{iaV~qw`krf&0v8y){8t4mCOIEKh)H^_(11mzI z_TZTTeRC{uMvR?p)g#eZBk#C5NMl)Dx0cPuxAn(@k+8lg5Y?$vl-s7v$S;~ZzjEQ? zXNK?VUqGe{4tBU{DYCgaRF5Utd7U z!XahEp6keTx}14tljt})P}Yj&%>zBb(+7gFo%|jJPPcNRpZ@&1h@aiXFKc@g-csyz zJGZfx#%O?!ep%+#CC+(DhfWibHJ|8Mrj#okr_rNTG+oqqlqzRu$k!7HDVkplt5Is5 zU4zDKwKbx0iBj*}+#BiZqbce%gECR=bv8IpW_Moe(_?M@fiU}5kIQ0qZs#gl-ojc} z`27L@NM^3X-qPxHZq;}AqJg#{QaNoO`>#~Lp6dCdqZW3pW`8gii5iU?d7~B#4+cq} z=^+TUmSwrvTPjqFuWYfGWz=ZP5k>sSaLx&3unbnhRRPR+z$`I z6Yvx~4g26%@I1T%e}p&TEjR@4Afk%7=s*|F#MxMh9$bJ+(2Fau5u0!wZp3!%#7)?Z z0X!9VU=X8tCJy5+JR8r)ui~Znb-Wy}#ar<TN9%BGpmHCsm=pfTSr%TDpH?3>bu7sU+k_`mT zkeMQDla1c9ie$0@ZK5I0Eg^A@Hc5!C`DHTMnT#Ze1rN^}+puE?^&2T&13qL32-4DNp7vZm@us{l{p`9!3Ow7l* zSc1o4DVE_vtieTCN9x;z&A1X*V=F0d2dQrtZowYxML(%<1pD!H)Nz0m_#E7gU%)To zCHNIm<;x6J{x04k)G)4`t)^Ov5+D31<+MQEM4p7!>8K|F+r6gRCMTjlC17(54_A8@ zl)0-GE#%;~fWWP~s>T9t7Z|fNP16c)7gBq|j1r)1u{opX;!vjAY?-c3hGtt2+ZdS# z?Tto4p$!o|$Qbhl(tJZE9A;8PF<=E_|d0D|L0SZa= zy$ZL{k!&q%8kwal4r>H)vba`(%dKd838*E~h@$=oN1nlf8C zLpAOb#88i|O4j^}@F=3H##Sq9YZTMHh{`3ldYK%768Ax(+UsgySY~xCY!*!gF4riq z8Iy3exYo!7XbJEpF4W>`ljUS^GaK7kV=}lEdxPKRdKSmHVxe{b9}RZ-^lqsETH$2q zhn;XCTmjd?-LMDlfkz1D_7lXt3a`T(1aWV}`prtKb&!RP3AuNSs9WBEVzr zc>*CfHqn)saWN45RMh+lisTLuApVKe{9iZ$Wr9ODL17s|;4+HdSnSqJioJoyZ%nzn zNwpaShA_+lFv?>%27sT(3-BVmm@D}=xsqRlH{uWQhj3rB@{<3&7<5GGS6&SRPf8;sF-S)HU4@LTELysvxplpu{n@IT5bKQR zVmV%CVK;d{ZR)g4d#>G?Oz&sr+0`-e4>?-F(cO7=dtwB{STTXe>^XLo$3NT!vzmFO zs>Ed4*~hV8Wd`tE#G2DQ*+=p1XEdSzD*ti*fVilap3XR2{3Z#6Q5 z$}^=ksk~b)Vu8O&<>#m+Ea*3>e5tF9n7nLId9}jjVNAlH@@lDTl*+5M>SD$z3@WeI ztILVbbSoy6Z*VoSAZC0jKfa>g0r$aPcpOGZQD24ENIgHG7=_Vx7NwMsYR<>w4Z6OV zt0$xDEfkImy1p5=8gzX-&p7F%s7y_F8zDI(?3dx!c;@LkydJ-U-{YbAUHE`S9aB+v zi#qm2nwpGUvwZH-p&b0+;T6eB+ZXYFj9&da5+Q-->nwNckM*n_&|^`$Lo>9KX|uFI z0Q#T;HW5~aV1Uj8R9gW}lpT-JUICpXvm80A_4BXcLh(Kk!dNke^5(n;a~Lt$L01b? z{~;=)^zB@br0pd4(iMDgJa;Rjy^@TS{^xq*e_Cdcyf&7IM=|b^@IMmp|JbBGY8F!3 zqq6ESLmY^K6r-bM_(}_ zf!ECb*N*`Fr6Pq_i(k==ujZMCtrV zIE7TSfFgrpdIL3w)U=Y`K~-^8T~6>*vD0NCs)~BcwRyn zdnH~K&t*^`Yf(*WRPab@)l9;*r0>ZSaP3J`d7m|t-?A#JnJWLXd~8C0gDa? z_qCo(6cv^18-V8VzyG%mOUvmMNUy;E&-seR|1anN F{|4+ukeC1f diff --git a/.gitignore b/.gitignore index 03e4dc10..cb73d173 100644 --- a/.gitignore +++ b/.gitignore @@ -1,57 +1,65 @@ -# Intellij IDEA -.idea - -# Jupyter Notebooks -.ipynb_checkpoints +# MacOS .DS_Store -# pip install -e generated -cdapython.egg-info # Python -*.pyc +**/*.py[cod] **/__pycache__ +dist/ +__pycache__/ +*$py.class + +# pip install -e generated +*.egg-info/ + +# Intellij +.idea/ -# pytest -.pytest_cache +# Google credentials files +**/GCS-service-account-key.* -#virtualenv -venv -venv8 -#vscode +# Virtual environment files +/venv*/ .vscode +.env -monkeytype.* +# Jupyter Notebooks +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# IPython +profile_default/ +ipython_config.py -dist #sonrqube scanner .scannerwork /notebooks/example-dev.ipynb typeReport -.DS_Store -.pymon -*.Q -*.tsv -*.csv +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage .coverage.* -.nox +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Data Files +**/*.Q +**/*.tsv +**/*.csv + +# Extras + +.pymon +monkeytype.* .pytype .mypy_cache -.coverage -.venv -venv* -venv2 -.ven3 -.venv_11 -ven3 -.nox -.pytype -venv11 -.coverage -.coverage.* config-dev.ini -.DS_Store .ruff_cache -.DS_Store -venv* -.DS_Store diff --git a/DataSummaries.ipynb b/DataSummaries.ipynb deleted file mode 100644 index f4886c9b..00000000 --- a/DataSummaries.ipynb +++ /dev/null @@ -1,1754 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "8a16a9fe", - "metadata": {}, - "source": [ - "# Summarize Search Results" - ] - }, - { - "cell_type": "markdown", - "id": "4f90b531", - "metadata": {}, - "source": [ - "The CDA provides a custom python tool for searching CDA data. [`Q`](usage/#q) (short for Query) offers several ways to search and filter data, and several input modes:\n", - "\n", - "---\n", - "- **Q.()** builds a query that can be used by `run()` or `count()`\n", - "- **Q.run()** returns data for the specified search \n", - "- **Q.count()** returns summary information (counts) data that fit the specified search\n", - "- **columns()** returns entity field names\n", - "- **unique_terms()** returns entity field contents\n", - "\n", - "---\n", - " \n", - "Before we do any work, we needs to import these functions cdapython.\n", - "We're also telling cdapython to report it's version so we can be sure we're using the one we mean to:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5245b7b0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
2022.8.29\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m2022.8\u001b[0m.\u001b[1;36m29\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import Q, columns, unique_terms, query\n", - "print(Q.get_version())\n", - "Q.set_host_url('http://localhost:8080')\n", - "Q.set_default_project_dataset(\"gdc-bq-sample.dev\")" - ] - }, - { - "cell_type": "markdown", - "id": "d538da92", - "metadata": {}, - "source": [ - "
\n", - " \n", - "CDA data comes from three sources:\n", - " \n", - " \n", - "The CDA makes this data searchable in four main endpoints:\n", - "\n", - "
    \n", - "
  • subject: A patient entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subjects privacy.
  • \n", - "
  • researchsubject: A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subjects privacy. This entity plays the role of the case_id in existing data. An individual who participates in 3 studies will have 3 researchsubject IDs
  • \n", - "
  • specimen: Any material taken as a sample from a biological entity (living or dead), or from a physical object or the environment. Specimens are usually collected as an example of their kind, often for use in some investigation.
  • \n", - "
  • file: A unit of data about subjects, researchsubjects, specimens, or their associated information
  • \n", - "
\n", - " \n", - "And two endpoints that offer deeper information about data in the researchsubject endpoint:\n", - "
    \n", - "
  • diagnosis: A collection of characteristics that describe an abnormal condition of the body as assessed at a point in time. May be used to capture information about neoplastic and non-neoplastic conditions.
  • \n", - "
  • treatment: Represent medication administration or other treatment types.
  • \n", - "
\n", - "Any metadata field can be searched from any endpoint, the only difference between search types is what type of data is returned by default. This means that you can think of the CDA as a really, really enormous spreadsheet full of data. To search this enormous spreadsheet, you'd want select columns, and then filter rows.\n", - "
\n", - "\n", - "\n", - "If you are looking to build a cohort of distinct individuals who meet some criteria, search by `subject`. If you want to build a cohort, but are particularly interested in studies rather than the participates per se, search by `researchsubject`. If you are looking for biosamples that can be ordered or a specific format of information (for e.g. histological slides) start with `specimen`. If you are primarily looking for files you can reuse for your own analysis, start with `file`.\n", - "\n", - "In CDA search, these concepts can also be chained together, so you can look specifically for specimen subjects, or researchsubject diagnoses. In the four 'main' tables, all of the rows will have one or more files associated with them that can be directly found by chaining, as in specimen files. Diagnosis and treatment do not have files directly associated with them and so can only be used to find files in conjunction with the other searches.\n", - "\n", - "In all cases, any search can use any metadata field, the only difference between search types is what type of data you return by default. \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "f76026a3", - "metadata": {}, - "source": [ - "## Getting simple summary data\n", - "\n", - "Let's try a broad search of the CDA to see what information exists about cancers that were first diagnosed in the brain. To run this simple search, we would first construct a query in `Q` and save it to a variable `myquery`. This is the same query we ran in the Basic Search notebook:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "c92a98ba", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "\n", - "\n", - "\n", - "myquery = Q('primary_diagnosis_site = \"brain\"')" - ] - }, - { - "cell_type": "markdown", - "id": "6e855573", - "metadata": { - "tags": [ - "Help!" - ] - }, - "source": [ - "\n", - "
\n", - "

Where did those terms come from?

\n", - " \n", - "If you aren't sure how we knew what terms to put in our search, please refer back to the What search terms are available? notebook. \n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "4e492876", - "metadata": {}, - "source": [ - "### Overall summary\n", - "\n", - "You can get a quick summary of how many unique specimens, treatments, diagnoses, researchsubjects and subjects meet your search criteria by chaining a `count` command into the basic `run` call. " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "c8e3599f", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 15.935 sec 15935 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m15.935\u001b[0m sec \u001b[1;36m15935\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
specimen_count : 39201\n",
-       "
\n" - ], - "text/plain": [ - "specimen_count : \u001b[1;36m39201\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
treatment_count : 2386\n",
-       "
\n" - ], - "text/plain": [ - "treatment_count : \u001b[1;36m2386\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
diagnosis_count : 1756\n",
-       "
\n" - ], - "text/plain": [ - "diagnosis_count : \u001b[1;36m1756\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
mutation_count : 904\n",
-       "
\n" - ], - "text/plain": [ - "mutation_count : \u001b[1;36m904\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
researchsubject_count : 3716\n",
-       "
\n" - ], - "text/plain": [ - "researchsubject_count : \u001b[1;36m3716\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
subject_count : 2384\n",
-       "
\n" - ], - "text/plain": [ - "subject_count : \u001b[1;36m2384\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "myquery.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "d483e944", - "metadata": {}, - "source": [ - "These numbers are how many total rows of data will come back when querying the various endpoints.\n", - "\n", - "\n", - "\n", - "### subject summary\n", - "\n", - "We can also add `count`to the other run calls we did in the Basic Search notebook to get more detailed summaries:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6d9137aa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 33.746 sec 33746 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m33.746\u001b[0m sec \u001b[1;36m33746\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
    total : 2384    \n",
-       "
\n" - ], - "text/plain": [ - " total : \u001b[1;36m2384\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
  files : 4099497   \n",
-       "
\n" - ], - "text/plain": [ - " files : \u001b[1;36m4099497\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
subject_identifier_systemcount
IDC1955
PDC309
GDC1454
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sexcount
None748
female653
male980
not reported3
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
racecount
None748
white1311
black or african american96
not reported136
asian33
not allowed to collect25
american indian or alaska native4
Unknown21
other9
native hawaiian or other pacific islander1
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ethnicitycount
None748
not hispanic or latino1285
not reported219
hispanic or latino85
Unknown22
not allowed to collect25
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cause_of_deathcount
None2098
Not Reported200
Cancer Related63
Not Cancer Related9
Infection3
Unknown9
Surgical Complications2
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
with flattened_result as (SELECT results.* EXCEPT(rn) FROM (SELECT ROW_NUMBER() OVER (PARTITION BY Subject.id,     \n",
-       "_subject_identifier.system, _subject_associated_project, _subject_Files) as rn, Subject.id AS subject_id,          \n",
-       "_subject_identifier.system AS subject_identifier_system, _subject_identifier.value AS subject_identifier_value,    \n",
-       "Subject.species AS species, Subject.sex AS sex, Subject.race AS race, Subject.ethnicity AS ethnicity,              \n",
-       "Subject.days_to_birth AS days_to_birth, _subject_associated_project, Subject.vital_status AS vital_status,         \n",
-       "Subject.days_to_death AS days_to_death, Subject.cause_of_death AS cause_of_death, _subject_Files FROM              \n",
-       "gdc-bq-sample.dev.all_Subjects_v3_0_final AS Subject LEFT JOIN UNNEST(Subject.ResearchSubject) AS _ResearchSubject \n",
-       "LEFT JOIN UNNEST(Subject.identifier) AS _subject_identifier LEFT JOIN UNNEST(Subject.subject_associated_project) AS\n",
-       "_subject_associated_project LEFT JOIN UNNEST(Subject.Files) AS _subject_Files WHERE                                \n",
-       "(IFNULL(UPPER(_ResearchSubject.primary_diagnosis_site), '') = UPPER('brain'))) as results WHERE rn = 1) select     \n",
-       "(SELECT COUNT(DISTINCT subject_id) from flattened_result) as total, (SELECT COUNT(DISTINCT _subject_Files) from    \n",
-       "flattened_result) as files, (select ARRAY(select as STRUCT subject_identifier_system, count(distinct subject_id) as\n",
-       "count from flattened_result group by subject_identifier_system)) as subject_identifier_system, (select ARRAY(select\n",
-       "as STRUCT sex, count(distinct subject_id) as count from flattened_result group by sex)) as sex, (select            \n",
-       "ARRAY(select as STRUCT race, count(distinct subject_id) as count from flattened_result group by race)) as race,    \n",
-       "(select ARRAY(select as STRUCT ethnicity, count(distinct subject_id) as count from flattened_result group by       \n",
-       "ethnicity)) as ethnicity, (select ARRAY(select as STRUCT cause_of_death, count(distinct subject_id) as count from  \n",
-       "flattened_result group by cause_of_death)) as cause_of_death                                                       \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;102;217;239;48;2;39;40;34mwith\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mSELECT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mresults\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m*\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mEXCEPT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrn\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mFROM\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mSELECT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mROW_NUMBER\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mOVER\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mPARTITION\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mBY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mid\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m_subject_identifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34msystem\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_associated_project\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_Files\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrn\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mid\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m_subject_identifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34msystem\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_system\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_identifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mvalue\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_value\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mspecies\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mspecies\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdays_to_birth\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdays_to_birth\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_associated_project\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mvital_status\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mvital_status\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdays_to_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdays_to_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_Files\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mFROM\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mgdc\u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m-\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mbq\u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m-\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msample\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mdev\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mall_Subjects_v3_0_final\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mLEFT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mJOIN\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUNNEST\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mResearchSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_ResearchSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;102;217;239;48;2;39;40;34mLEFT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mJOIN\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUNNEST\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34midentifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_identifier\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mLEFT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mJOIN\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUNNEST\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_associated_project\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m_subject_associated_project\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mLEFT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mJOIN\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUNNEST\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mFiles\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mAS\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_Files\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mWHERE\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mIFNULL\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUPPER\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_ResearchSubject\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m.\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mprimary_diagnosis_site\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;230;219;116;48;2;39;40;34m''\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m=\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mUPPER\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;230;219;116;48;2;39;40;34m'brain'\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mresults\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mWHERE\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrn\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;249;38;114;48;2;39;40;34m=\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;174;129;255;48;2;39;40;34m1\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mSELECT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mCOUNT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mDISTINCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mtotal\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mSELECT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mCOUNT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mDISTINCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m_subject_Files\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mfiles\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_system\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\n", - "\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_system\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_identifier_system\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\n", - "\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msex\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mrace\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34methnicity\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mARRAY\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mselect\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mSTRUCT\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m,\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m(\u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mdistinct\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34msubject_id\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mcount\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mfrom\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n", - "\u001b[38;2;248;248;242;48;2;39;40;34mflattened_result\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mgroup\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mby\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m)\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;102;217;239;48;2;39;40;34mas\u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34m \u001b[0m\u001b[38;2;248;248;242;48;2;39;40;34mcause_of_death\u001b[0m\u001b[48;2;39;40;34m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "myquery.subject.count.run(show_sql=True)" - ] - }, - { - "cell_type": "markdown", - "id": "dff2da08", - "metadata": {}, - "source": [ - "Since we save the output as a variable, we need to look at the variable to see the results:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "993479db", - "metadata": {}, - "outputs": [], - "source": [ - "subjectresults" - ] - }, - { - "cell_type": "markdown", - "id": "e7e6d522", - "metadata": {}, - "source": [ - "By default, the results are displayed as a table for easy previewing of the data. Since we queried the `subject` endpoint, our default results tell us `subject` level information, that is, information about unique individuals: their sex, race, age, species, etc. Using counts gives us back a nice pivot table type summary of the countable fields for Subjects. Note that above the table it also tells you the total subject count, as well as how many files are associated with those subjects." - ] - }, - { - "cell_type": "markdown", - "id": "05e52f3f", - "metadata": {}, - "source": [ - "\n", - "---\n", - "\n", - "
\n", - "\n", - "

Subject Field Definitions

\n", - "\n", - "A patient entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subjects privacy.\n", - " \n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of subjects returned.
files The number of files that match this search.
identifier.value(`system`) The identifier for the data provider.
species The taxonomic group (e.g. species) of the subject.
sex The biologic character or quality that distinguishes male and female from one another as expressed by analysis of the person's gonadal, morphologic (internal and external), chromosomal, and hormonal characteristics.
race An arbitrary classification of a taxonomic group that is a division of a species.
ethnicity An individual's self-described social and cultural grouping.
cause_of_death The cause of death, if known
\n", - "\n", - "
\n", - " \n", - "---" - ] - }, - { - "cell_type": "markdown", - "id": "5356bf41", - "metadata": {}, - "source": [ - "This gives you a quick way to assess whether the full search results will have the data fields you require. But if you want to get the underlying data for your own downstream applications, you can also get the raw numbers by calling the zeroth value of the variable:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "869dfd5d", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'NoneType' object is not subscriptable", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [6]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msubjectresults\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mTypeError\u001b[0m: 'NoneType' object is not subscriptable" - ] - } - ], - "source": [ - "subjectresults[0]" - ] - }, - { - "cell_type": "markdown", - "id": "73745aa6", - "metadata": {}, - "source": [ - "### researchsubject\n", - "\n", - "If we're interested in what researchsubjects meet our criteria, we can also run our query against the researchsubject endpoint. Lets run it without saving to a variable this time to make it a bit quicker:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "a2d8c874", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: Unrecognized name: _researchsubject_identifier at [1:121]\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: Unrecognized name: _researchsubject_identifier at \u001b[1m[\u001b[0m\u001b[1;92m1:121\u001b[0m\u001b[1m]\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 1.08 sec 1080 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m1.08\u001b[0m sec \u001b[1;36m1080\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.researchsubject.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "4564beee", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "---\n", - "\n", - "
\n", - "\n", - "

ResearchSubject Field Definitions

\n", - "\n", - "A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subjects privacy. This entity plays the role of the case_id in existing data. An individual who participates in 3 studies will have 3 researchsubject IDs\n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of researchsubjects returned.
files The number of files that match this search.
identifier.value(`system`) The identifier for the data provider.
primary_diagnosis_condition The text term used to describe the type of malignant disease.
primary_diagnosis_site The text term used to describe the primary site of disease.
\n", - "
\n", - " \n", - "---" - ] - }, - { - "cell_type": "markdown", - "id": "8a67cf09", - "metadata": {}, - "source": [ - "### diagnosis\n", - "\n", - "The diagnosis endpoint is an extension of the researchsubject endpoint, and returns information about researchsubjects that have a diagnosis that meets our search criteria. :" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "7770d68c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: Unrecognized name: _diagnosis_identifier at [1:115]\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: Unrecognized name: _diagnosis_identifier at \u001b[1m[\u001b[0m\u001b[1;92m1:115\u001b[0m\u001b[1m]\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.942 sec 942 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.942\u001b[0m sec \u001b[1;36m942\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.diagnosis.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "eb73357c", - "metadata": {}, - "source": [ - "---\n", - "\n", - "
\n", - "\n", - "

Diagnosis Field Definitions

\n", - "\n", - "A collection of characteristics that describe an abnormal condition of the body as assessed at a point in time. May be used to capture information about neoplastic and non-neoplastic conditions.\n", - "\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of diagnoses returned.
identifier.value(`system`) The identifier for the data provider.
primary_diagnosis The diagnosis instance that qualified a subject for inclusion on a ResearchProject.
stage The extent of a cancer in the body.
grade The degree of abnormality of cancer cells.
\n", - "\n", - "\n", - "
\n", - " \n", - "---\n" - ] - }, - { - "cell_type": "markdown", - "id": "97442718", - "metadata": {}, - "source": [ - "### treatment\n", - "\n", - "The treatment endpoint is an extension of diagnosis and returns information about treatments undertaken on research subjects that have a given diagnosis that meets our search criteria:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "be1ac64f", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: Unrecognized name: _treatment_identifier at [1:115]\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: Unrecognized name: _treatment_identifier at \u001b[1m[\u001b[0m\u001b[1;92m1:115\u001b[0m\u001b[1m]\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.732 sec 732 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.732\u001b[0m sec \u001b[1;36m732\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.treatment.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "ec5dda7f", - "metadata": {}, - "source": [ - "\n", - "---\n", - "\n", - "
\n", - "\n", - "

Treatment Field Definitions

\n", - "\n", - " Medication administration or other treatment types. A single research subject may have multiple treatments for a single diagnosis, and/or different diagnoses, and different treatments, across different studies\n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of treatments returned.
identifier.value(`system`) The identifier for the data provider.
treatment_type The treatment type including medication/therapeutics or other procedures.
treatment_effectThe effect of a treatment on the diagnosis or tumor.
\n", - " \n", - "\n", - "
\n", - " \n", - "---\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "9c00bec8", - "metadata": {}, - "source": [ - "### specimens\n", - "\n", - "We can use this same query to see what specimens are available for brain tissue at the CDA:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "51960eed", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: Unrecognized name: _specimen_identifier at [1:114]\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: Unrecognized name: _specimen_identifier at \u001b[1m[\u001b[0m\u001b[1;92m1:114\u001b[0m\u001b[1m]\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.766 sec 766 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.766\u001b[0m sec \u001b[1;36m766\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.specimen.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "7f254547", - "metadata": {}, - "source": [ - "Nearly 40,000 specimens with over 50,000 files meet our search criteria! We would typically expect this number to be much larger than our number of subjects or research_subjects. First because studies will often take more than one sample per subject, and second because any given specimen might be aliquoted out to be used in multiple tests. " - ] - }, - { - "cell_type": "markdown", - "id": "b3ed75e5", - "metadata": {}, - "source": [ - "
\n", - "\n", - "

Specimen Field Definitions

\n", - "\n", - "Any material taken as a sample from a biological entity (living or dead), or from a physical object or the environment. Specimens are usually collected as an example of their kind, often for use in some investigation.\n", - " A given specimen will have only a single subject ID and a single research subject ID\n", - " \n", - " \n", - "\n", - " \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of specimens returned.
files The number of files that match this search.
identifier.value(`system`) The identifier for the data provider.
primary_disease_type The text term used to describe the type of malignant disease.
source_material_type The general kind of material from which the specimen was derived.
specimen_type The high-level type of the specimen, based on its how it has been derived from the original extracted sample. One of: analyte, aliquot, portion, sample, or slide.
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "5ccb0f1c", - "metadata": {}, - "source": [ - "### file\n", - "\n", - "The file endpoint returns all files that match our query:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "d6e0639a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: None\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: \u001b[3;35mNone\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.104 sec 104 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.104\u001b[0m sec \u001b[1;36m104\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.file.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "e34044c9", - "metadata": {}, - "source": [ - "There are a huge number of files (4099497) that match our search. Likely we would want to additionally filter the results by file format or data type to get only files we can use. See all the ways you can filter and refine searches with more search terms in the Operators notebook.\n", - "\n", - "\n", - "
\n", - "\n", - "

File Field Definitions

\n", - "\n", - "A file is an information-bearing electronic object that contains a physical embodiment of some information using a particular character encoding.\n", - "\n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
id (`total`) The overall number of files returned.
identifier.value(`system`) The identifier for the data provider.
data_catagoryBroad categorization of the contents of the data file.
data_typeSpecific content type of the data file.
file_formatFormat of the data files.
\n", - "\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "a899b6ca", - "metadata": {}, - "source": [ - "### mutation\n", - "\n", - "The mutation endpoint returns all mutations that match our query:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "900f8b52", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 8.545 sec 8545 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m8.545\u001b[0m sec \u001b[1;36m8545\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
    total : 904     \n",
-       "
\n" - ], - "text/plain": [ - " total : \u001b[1;36m904\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "KeyError", - "evalue": "'ncbi_build'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m~/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/IPython/core/formatters.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[0mtype_pprinters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype_printers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 701\u001b[0m deferred_pprinters=self.deferred_printers)\n\u001b[0;32m--> 702\u001b[0;31m \u001b[0mprinter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpretty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 703\u001b[0m \u001b[0mprinter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 704\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mstream\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetvalue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/IPython/lib/pretty.py\u001b[0m in \u001b[0;36mpretty\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 392\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 393\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dict__\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'__repr__'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 394\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_repr_pprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcycle\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 395\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_default_pprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcycle\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/IPython/lib/pretty.py\u001b[0m in \u001b[0;36m_repr_pprint\u001b[0;34m(obj, p, cycle)\u001b[0m\n\u001b[1;32m 698\u001b[0m \u001b[0;34m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 699\u001b[0m \u001b[0;31m# Find newlines and replace them with p.break_()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 700\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrepr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 701\u001b[0m \u001b[0mlines\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplitlines\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py\u001b[0m in \u001b[0;36m__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__repr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 89\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_repr_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshow_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow_sql\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/count_result.py\u001b[0m in \u001b[0;36m_repr_value\u001b[0;34m(self, show_value)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 45\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 46\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"null\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: 'ncbi_build'" - ] - } - ], - "source": [ - "myquery.mutation.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "6a4fd46e", - "metadata": {}, - "source": [ - "## Files from a single endpoint (endpoint chaining)\n", - "\n", - "If you want all file formats and data types, but only from a specific endpoint, you can also filter the file results by chaining endpoints together. This will return all the files that match our search AND that are specifically from specimens:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "2cfb2582", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                Http Status: 500\n",
-       "                Error Message: None\n",
-       "                \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m500\u001b[0m\n", - " Error Message: \u001b[3;35mNone\u001b[0m\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 0.117 sec 117 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m0.117\u001b[0m sec \u001b[1;36m117\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "myquery.specimen.file.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "6994e312", - "metadata": {}, - "source": [ - "Learn more about chaining endpoints in the [Chaining endpoints](\"../AdvancedSearch-Chaining\") notebook." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "metadata": { - "interpreter": { - "hash": "5c867d1980d5b66d2bfc8a5903dcee074b4b68f3917a4f27f8a310cab24e9f1b" - } - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Untitled-2.ipynb b/Untitled-2.ipynb deleted file mode 100644 index a1b14401..00000000 --- a/Untitled-2.ipynb +++ /dev/null @@ -1,155 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                            Total execution time: 0\n",
-       "                            min 3.755 sec 3755 ms\n",
-       "                            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m3.755\u001b[0m sec \u001b[1;36m3755\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": [
-       "\u001b[?25l"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[?25h" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[10], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m set_host_url(\u001b[39m\"\u001b[39m\u001b[39mhttp://35.192.60.10:8080\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m set_table_version(\u001b[39m\"\u001b[39m\u001b[39mall_Subjects_v3_0_final\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m a \u001b[39m=\u001b[39m Q(\u001b[39m'\u001b[39;49m\u001b[39msex = \u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m%\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m AND researchsubject_identifier_system = \u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mIDC\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m'\u001b[39;49m)\u001b[39m.\u001b[39;49mrun()\u001b[39m.\u001b[39;49mget_all()\n\u001b[1;32m 7\u001b[0m \u001b[39mprint\u001b[39m(a)\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py:206\u001b[0m, in \u001b[0;36mResult.get_all\u001b[0;34m(self, output, limit, show_bar)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[39m# add this to cast to a subclass of CollectResult\u001b[39;00m\n\u001b[1;32m 202\u001b[0m collect_result: \u001b[39m\"\u001b[39m\u001b[39mCollectResult\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m=\u001b[39m cast(\n\u001b[1;32m 203\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mCollectResult\u001b[39m\u001b[39m\"\u001b[39m, ResultFactory\u001b[39m.\u001b[39mcreate_entity(COLLECT_RESULT, \u001b[39mself\u001b[39m)\n\u001b[1;32m 204\u001b[0m )\n\u001b[0;32m--> 206\u001b[0m \u001b[39mfor\u001b[39;00m index, i \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(iterator):\n\u001b[1;32m 207\u001b[0m \u001b[39mif\u001b[39;00m index \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 208\u001b[0m \u001b[39mcontinue\u001b[39;00m\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Paginator.py:156\u001b[0m, in \u001b[0;36mPaginator.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprogress\u001b[39m.\u001b[39mtasks:\n\u001b[1;32m 155\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprogress\u001b[39m.\u001b[39mremove_task(i\u001b[39m.\u001b[39mid)\n\u001b[0;32m--> 156\u001b[0m \u001b[39mraise\u001b[39;00m e\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Paginator.py:142\u001b[0m, in \u001b[0;36mPaginator.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m\n\u001b[1;32m 140\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcount \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult\u001b[39m.\u001b[39mcount\n\u001b[0;32m--> 142\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_do_next()\n\u001b[1;32m 143\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 144\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mshow_bar:\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Paginator.py:89\u001b[0m, in \u001b[0;36mPaginator._do_next\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult\u001b[39m.\u001b[39mhas_next_page:\n\u001b[1;32m 88\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 89\u001b[0m tmp_result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mresult\u001b[39m.\u001b[39;49mnext_page(limit\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlimit)\n\u001b[1;32m 90\u001b[0m \u001b[39mif\u001b[39;00m tmp_result:\n\u001b[1;32m 91\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult \u001b[39m=\u001b[39m tmp_result\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py:267\u001b[0m, in \u001b[0;36mResult.next_page\u001b[0;34m(self, limit, async_req, pre_stream)\u001b[0m\n\u001b[1;32m 265\u001b[0m _offset: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_offset \u001b[39m+\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_limit\n\u001b[1;32m 266\u001b[0m _limit: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m limit \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_limit\n\u001b[0;32m--> 267\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_result(_offset, _limit, async_req, pre_stream)\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py:294\u001b[0m, in \u001b[0;36mResult._get_result\u001b[0;34m(self, _offset, _limit, async_req, pre_stream)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_get_result\u001b[39m(\n\u001b[1;32m 288\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 289\u001b[0m _offset: \u001b[39mint\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 292\u001b[0m pre_stream: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m 293\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Union[Result, StringResult, ColumnsResult, \u001b[39mNone\u001b[39;00m]:\n\u001b[0;32m--> 294\u001b[0m \u001b[39mreturn\u001b[39;00m get_query_result(\n\u001b[1;32m 295\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m__class__\u001b[39;49m,\n\u001b[1;32m 296\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_api_instance,\n\u001b[1;32m 297\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_query_id,\n\u001b[1;32m 298\u001b[0m _offset,\n\u001b[1;32m 299\u001b[0m _limit,\n\u001b[1;32m 300\u001b[0m async_req,\n\u001b[1;32m 301\u001b[0m pre_stream,\n\u001b[1;32m 302\u001b[0m format_type\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mformat_type,\n\u001b[1;32m 303\u001b[0m )\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/results/result.py:345\u001b[0m, in \u001b[0;36mget_query_result\u001b[0;34m(clz, api_instance, query_id, offset, limit, async_req, pre_stream, show_sql, show_count, format_type)\u001b[0m\n\u001b[1;32m 342\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(response, ApplyResult):\n\u001b[1;32m 343\u001b[0m response \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mget()\n\u001b[0;32m--> 345\u001b[0m sleep(\u001b[39m2.5\u001b[39;49m)\n\u001b[1;32m 346\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mtotal_row_count \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 347\u001b[0m \u001b[39mreturn\u001b[39;00m clz(\n\u001b[1;32m 348\u001b[0m response,\n\u001b[1;32m 349\u001b[0m query_id,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 355\u001b[0m format_type,\n\u001b[1;32m 356\u001b[0m )\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "from cdapython import Q,set_default_project_dataset,set_host_url,set_table_version\n", - "set_default_project_dataset(\"gdc-bq-sample.dev\")\n", - "set_host_url(\"http://35.192.60.10:8080\")\n", - "set_table_version(\"all_Subjects_v3_0_final\")\n", - "\n", - "a = Q('sex = \"%\" AND researchsubject_identifier_system = \"IDC\"').run().get_all()\n", - "print(a)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "ven3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "eae9c1a3dd8e2f898c643d98dd719bbe12e700aeabe7bf687912cfc443f15d3e" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Untitled.ipynb b/Untitled.ipynb deleted file mode 100644 index cce1743b..00000000 --- a/Untitled.ipynb +++ /dev/null @@ -1,389 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "a93a79c9-352f-41cb-9113-6daadaef1d99", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'2022.7.13'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cdapython import Q,columns,unique_terms\n", - "Q.get_version()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "525765be-da36-4349-8ca7-546fc95c15d7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
https://cancerdata.dsde-dev.broadinstitute.org/\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[4;94mhttps://cancerdata.dsde-dev.broadinstitute.org/\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
broad-dsde-dev.cda_dev\n",
-       "
\n" - ], - "text/plain": [ - "broad-dsde-dev.cda_dev\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "Q.set_default_project_dataset(\"broad-dsde-dev.cda_dev\")\n", - "Q.set_host_url(\"https://cancerdata.dsde-dev.broadinstitute.org/\")\n", - "\n", - "print(Q.get_host_url())\n", - "print(Q.get_default_project_dataset())" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "31a2d29e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Total execution time: 4309 ms 4.309 sec 0 min\n",
-       "
\n" - ], - "text/plain": [ - "Total execution time: \u001b[1;36m4309\u001b[0m ms \u001b[1;36m4.309\u001b[0m sec \u001b[1;36m0\u001b[0m min\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - " mylist = Q('ResearchSubject.Specimen.specimen_type= \"slide\" OR file.data_type= \"Slide Image\"').specimen.file.run().to_comma_str(\"identifier\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "051e1ff6-e493-4a39-81ad-4459fef8a980", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Total execution time: 0 min 4.989 sec 4989 ms \n",
-       "
\n" - ], - "text/plain": [ - "Total execution time: \u001b[1;36m0\u001b[0m min \u001b[1;36m4.989\u001b[0m sec \u001b[1;36m4989\u001b[0m ms \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2c67b57005d34d93b3a2b5380d6776d9", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
39864\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m39864\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "n = Q(\"sex = 'male' AND sex != 'null'\").run(limit=1000,async_call=True)\n", - "box = []\n", - "for i in n.paginator(limit=2000):\n", - " box.extend(i)\n", - "print(len(box))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "75d36ce1-b772-4120-a695-68eb39f6e3dd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                            id  \\\n",
-       "0                         2004   \n",
-       "1                         2235   \n",
-       "2                         2377   \n",
-       "3                         2458   \n",
-       "4                         3566   \n",
-       "...                        ...   \n",
-       "39859             TCGA-XK-AAJU   \n",
-       "39860             TCGA-XP-A8T6   \n",
-       "39861             TCGA-ZF-A9R5   \n",
-       "39862  UTRI_SUBJECT_001_000573   \n",
-       "39863  UTRI_SUBJECT_001_000588   \n",
-       "\n",
-       "                                              identifier       species   sex  \\\n",
-       "0                   [{'system': 'GDC', 'value': '2004'}]  homo sapiens  male   \n",
-       "1                   [{'system': 'GDC', 'value': '2235'}]  homo sapiens  male   \n",
-       "2                   [{'system': 'GDC', 'value': '2377'}]  homo sapiens  male   \n",
-       "3                   [{'system': 'GDC', 'value': '2458'}]  homo sapiens  male   \n",
-       "4                   [{'system': 'GDC', 'value': '3566'}]  homo sapiens  male   \n",
-       "...                                                  ...           ...   ...   \n",
-       "39859  [{'system': 'GDC', 'value': 'TCGA-XK-AAJU'}, {...  homo sapiens  male   \n",
-       "39860  [{'system': 'GDC', 'value': 'TCGA-XP-A8T6'}, {...  homo sapiens  male   \n",
-       "39861  [{'system': 'GDC', 'value': 'TCGA-ZF-A9R5'}, {...  homo sapiens  male   \n",
-       "39862  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "39863  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "\n",
-       "                            race               ethnicity  days_to_birth  \\\n",
-       "0                          white  not hispanic or latino            NaN   \n",
-       "1                        Unknown                 Unknown            NaN   \n",
-       "2      black or african american                 Unknown            NaN   \n",
-       "3                        Unknown                 Unknown            NaN   \n",
-       "4                   not reported            not reported            NaN   \n",
-       "...                          ...                     ...            ...   \n",
-       "39859                      white            not reported       -23958.0   \n",
-       "39860  black or african american      hispanic or latino       -19886.0   \n",
-       "39861                      white  not hispanic or latino       -21811.0   \n",
-       "39862               not reported            not reported            NaN   \n",
-       "39863               not reported            not reported            NaN   \n",
-       "\n",
-       "      subject_associated_project  vital_status  days_to_death cause_of_death  \n",
-       "0            [BEATAML1.0-COHORT]          Dead            NaN           None  \n",
-       "1            [BEATAML1.0-COHORT]          Dead            NaN           None  \n",
-       "2            [BEATAML1.0-COHORT]         Alive            NaN           None  \n",
-       "3            [BEATAML1.0-COHORT]          Dead            NaN           None  \n",
-       "4                     [OHSU-CNL]          Dead            NaN           None  \n",
-       "...                          ...           ...            ...            ...  \n",
-       "39859     [TCGA-PRAD, tcga_prad]         Alive            NaN           None  \n",
-       "39860     [tcga_esca, TCGA-ESCA]          Dead          763.0           None  \n",
-       "39861     [TCGA-BLCA, tcga_blca]         Alive            NaN           None  \n",
-       "39862                 [TRIO-CRU]  Not Reported            NaN           None  \n",
-       "39863                 [TRIO-CRU]  Not Reported            NaN           None  \n",
-       "\n",
-       "[39864 rows x 11 columns]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[37;44m id \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2004\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2235\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2377\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2458\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m3566\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39859\u001b[0m\u001b[37;44m TCGA-XK-AAJU \u001b[0m\n", - "\u001b[1;36;44m39860\u001b[0m\u001b[37;44m TCGA-XP-A8T6 \u001b[0m\n", - "\u001b[1;36;44m39861\u001b[0m\u001b[37;44m TCGA-ZF-A9R5 \u001b[0m\n", - "\u001b[1;36;44m39862\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000573 \u001b[0m\n", - "\u001b[1;36;44m39863\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000588 \u001b[0m\n", - "\n", - "\u001b[37;44m identifier species sex \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2004'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2235'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2377'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2458'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'3566'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39859\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-XK-AAJU'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m39860\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-XP-A8T6'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m39861\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-ZF-A9R5'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m39862\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m39863\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\n", - "\u001b[37;44m race ethnicity days_to_birth \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m Unknown Unknown NaN \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m black or african american Unknown NaN \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m Unknown Unknown NaN \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39859\u001b[0m\u001b[37;44m white not reported \u001b[0m\u001b[1;36;44m-23958.0\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39860\u001b[0m\u001b[37;44m black or african american hispanic or latino \u001b[0m\u001b[1;36;44m-19886.0\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39861\u001b[0m\u001b[37;44m white not hispanic or latino \u001b[0m\u001b[1;36;44m-21811.0\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39862\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\u001b[1;36;44m39863\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\n", - "\u001b[37;44m subject_associated_project vital_status days_to_death cause_of_death \u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mOHSU-CNL\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39859\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTCGA-PRAD, tcga_prad\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39860\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mtcga_esca, TCGA-ESCA\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Dead \u001b[0m\u001b[1;36;44m763.0\u001b[0m\u001b[37;44m \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39861\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTCGA-BLCA, tcga_blca\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39862\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m39863\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\n", - "\u001b[1;37;44m[\u001b[0m\u001b[1;36;44m39864\u001b[0m\u001b[37;44m rows x \u001b[0m\u001b[1;36;44m11\u001b[0m\u001b[37;44m columns\u001b[0m\u001b[1;37;44m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import pandas as pd\n", - "df = pd.DataFrame(box)\n", - "print(df, style=\"white on blue\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "284ab4e3-8c39-452c-bcae-2a6d08b9b29e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Untitled1.ipynb b/Untitled1.ipynb deleted file mode 100644 index f72340f3..00000000 --- a/Untitled1.ipynb +++ /dev/null @@ -1,457 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "b974248c-48b3-4233-8211-4677aa63d377", - "metadata": {}, - "outputs": [ - { - "data": { - "application/javascript": "/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */\n!function(e,t){\"use strict\";\"object\"==typeof module&&\"object\"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error(\"jQuery requires a window with a document\");return t(e)}:t(e)}(\"undefined\"!=typeof window?window:this,function(C,e){\"use strict\";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return\"function\"==typeof e&&\"number\"!=typeof e.nodeType&&\"function\"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement(\"script\");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+\"\":\"object\"==typeof e||\"function\"==typeof e?n[o.call(e)]||\"object\":typeof e}var f=\"3.6.0\",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&\"length\"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&(\"array\"===n||0===t||\"number\"==typeof t&&0+~]|\"+M+\")\"+M+\"*\"),U=new RegExp(M+\"|>\"),X=new RegExp(F),V=new RegExp(\"^\"+I+\"$\"),G={ID:new RegExp(\"^#(\"+I+\")\"),CLASS:new RegExp(\"^\\\\.(\"+I+\")\"),TAG:new RegExp(\"^(\"+I+\"|[*])\"),ATTR:new RegExp(\"^\"+W),PSEUDO:new RegExp(\"^\"+F),CHILD:new RegExp(\"^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\\\(\"+M+\"*(even|odd|(([+-]|)(\\\\d*)n|)\"+M+\"*(?:([+-]|)\"+M+\"*(\\\\d+)|))\"+M+\"*\\\\)|)\",\"i\"),bool:new RegExp(\"^(?:\"+R+\")$\",\"i\"),needsContext:new RegExp(\"^\"+M+\"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\\\(\"+M+\"*((?:-\\\\d)?\\\\d*)\"+M+\"*\\\\)|)(?=[^-]|$)\",\"i\")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\\d$/i,K=/^[^{]+\\{\\s*\\[native \\w/,Z=/^(?:#([\\w-]+)|(\\w+)|\\.([\\w-]+))$/,ee=/[+~]/,te=new RegExp(\"\\\\\\\\[\\\\da-fA-F]{1,6}\"+M+\"?|\\\\\\\\([^\\\\r\\\\n\\\\f])\",\"g\"),ne=function(e,t){var n=\"0x\"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\\0-\\x1f\\x7f]|^-?\\d)|^-$|[^\\0-\\x1f\\x7f-\\uFFFF\\w-]/g,ie=function(e,t){return t?\"\\0\"===e?\"\\ufffd\":e.slice(0,-1)+\"\\\\\"+e.charCodeAt(e.length-1).toString(16)+\" \":\"\\\\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&\"fieldset\"===e.nodeName.toLowerCase()},{dir:\"parentNode\",next:\"legend\"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],\"string\"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+\" \"]&&(!v||!v.test(t))&&(1!==p||\"object\"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute(\"id\"))?s=s.replace(re,ie):e.setAttribute(\"id\",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?\"#\"+s:\":scope\")+\" \"+xe(l[o]);c=l.join(\",\")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute(\"id\")}}}return g(t.replace($,\"$1\"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+\" \")>b.cacheLength&&delete e[r.shift()],e[t+\" \"]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement(\"fieldset\");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split(\"|\"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return\"input\"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return(\"input\"===t||\"button\"===t)&&e.type===n}}function ge(t){return function(e){return\"form\"in e?e.parentNode&&!1===e.disabled?\"label\"in e?\"label\"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:\"label\"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&\"undefined\"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||\"HTML\")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener(\"unload\",oe,!1):n.attachEvent&&n.attachEvent(\"onunload\",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement(\"div\")),\"undefined\"!=typeof e.querySelectorAll&&!e.querySelectorAll(\":scope fieldset div\").length}),d.attributes=ce(function(e){return e.className=\"i\",!e.getAttribute(\"className\")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment(\"\")),!e.getElementsByTagName(\"*\").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute(\"id\")===t}},b.find.ID=function(e,t){if(\"undefined\"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t=\"undefined\"!=typeof e.getAttributeNode&&e.getAttributeNode(\"id\");return t&&t.value===n}},b.find.ID=function(e,t){if(\"undefined\"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode(\"id\"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode(\"id\"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return\"undefined\"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if(\"*\"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if(\"undefined\"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML=\"\",e.querySelectorAll(\"[msallowcapture^='']\").length&&v.push(\"[*^$]=\"+M+\"*(?:''|\\\"\\\")\"),e.querySelectorAll(\"[selected]\").length||v.push(\"\\\\[\"+M+\"*(?:value|\"+R+\")\"),e.querySelectorAll(\"[id~=\"+S+\"-]\").length||v.push(\"~=\"),(t=C.createElement(\"input\")).setAttribute(\"name\",\"\"),e.appendChild(t),e.querySelectorAll(\"[name='']\").length||v.push(\"\\\\[\"+M+\"*name\"+M+\"*=\"+M+\"*(?:''|\\\"\\\")\"),e.querySelectorAll(\":checked\").length||v.push(\":checked\"),e.querySelectorAll(\"a#\"+S+\"+*\").length||v.push(\".#.+[+~]\"),e.querySelectorAll(\"\\\\\\f\"),v.push(\"[\\\\r\\\\n\\\\f]\")}),ce(function(e){e.innerHTML=\"\";var t=C.createElement(\"input\");t.setAttribute(\"type\",\"hidden\"),e.appendChild(t).setAttribute(\"name\",\"D\"),e.querySelectorAll(\"[name=d]\").length&&v.push(\"name\"+M+\"*[*^$|!~]?=\"),2!==e.querySelectorAll(\":enabled\").length&&v.push(\":enabled\",\":disabled\"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(\":disabled\").length&&v.push(\":enabled\",\":disabled\"),e.querySelectorAll(\"*,:x\"),v.push(\",.*:\")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,\"*\"),c.call(e,\"[s!='']:x\"),s.push(\"!=\",F)}),v=v.length&&new RegExp(v.join(\"|\")),s=s.length&&new RegExp(s.join(\"|\")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+\" \"]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0\":{dir:\"parentNode\",first:!0},\" \":{dir:\"parentNode\"},\"+\":{dir:\"previousSibling\",first:!0},\"~\":{dir:\"previousSibling\"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||\"\").replace(te,ne),\"~=\"===e[2]&&(e[3]=\" \"+e[3]+\" \"),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),\"nth\"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*(\"even\"===e[3]||\"odd\"===e[3])),e[5]=+(e[7]+e[8]||\"odd\"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||\"\":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(\")\",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return\"*\"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+\" \"];return t||(t=new RegExp(\"(^|\"+M+\")\"+e+\"(\"+M+\"|$)\"))&&m(e,function(e){return t.test(\"string\"==typeof e.className&&e.className||\"undefined\"!=typeof e.getAttribute&&e.getAttribute(\"class\")||\"\")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?\"!=\"===r:!r||(t+=\"\",\"=\"===r?t===i:\"!=\"===r?t!==i:\"^=\"===r?i&&0===t.indexOf(i):\"*=\"===r?i&&-1\",\"#\"===e.firstChild.getAttribute(\"href\")})||fe(\"type|href|height|width\",function(e,t,n){if(!n)return e.getAttribute(t,\"type\"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML=\"\",e.firstChild.setAttribute(\"value\",\"\"),\"\"===e.firstChild.getAttribute(\"value\")})||fe(\"value\",function(e,t,n){if(!n&&\"input\"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute(\"disabled\")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);S.find=d,S.expr=d.selectors,S.expr[\":\"]=S.expr.pseudos,S.uniqueSort=S.unique=d.uniqueSort,S.text=d.getText,S.isXMLDoc=d.isXML,S.contains=d.contains,S.escapeSelector=d.escape;var h=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&S(e).is(n))break;r.push(e)}return r},T=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},k=S.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var N=/^<([a-z][^\\/\\0>:\\x20\\t\\r\\n\\f]*)[\\x20\\t\\r\\n\\f]*\\/?>(?:<\\/\\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):\"string\"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,\"string\"==typeof e){if(!(r=\"<\"===e[0]&&\">\"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\\x20\\t\\r\\n\\f]*)/i,he=/^$|^module$|\\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement(\"div\")),(fe=E.createElement(\"input\")).setAttribute(\"type\",\"radio\"),fe.setAttribute(\"checked\",\"checked\"),fe.setAttribute(\"name\",\"t\"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML=\"\",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML=\"\",y.option=!!ce.lastChild;var ge={thead:[1,\"\",\"
\"],col:[2,\"\",\"
\"],tr:[2,\"\",\"
\"],td:[3,\"\",\"
\"],_default:[0,\"\",\"\"]};function ve(e,t){var n;return n=\"undefined\"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||\"*\"):\"undefined\"!=typeof e.querySelectorAll?e.querySelectorAll(t||\"*\"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n\",\"\"]);var me=/<|&#?\\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\\s*$/g;function je(e,t){return A(e,\"table\")&&A(11!==t.nodeType?t:t.firstChild,\"tr\")&&S(e).children(\"tbody\")[0]||e}function De(e){return e.type=(null!==e.getAttribute(\"type\"))+\"/\"+e.type,e}function qe(e){return\"true/\"===(e.type||\"\").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute(\"type\"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,\"handle events\"),s)for(n=0,r=s[i].length;n\").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on(\"load error\",i=function(e){r.remove(),i=null,e&&t(\"error\"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\\?(?=&|$)|\\?\\?/;S.ajaxSetup({jsonp:\"callback\",jsonpCallback:function(){var e=zt.pop()||S.expando+\"_\"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter(\"json jsonp\",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?\"url\":\"string\"==typeof e.data&&0===(e.contentType||\"\").indexOf(\"application/x-www-form-urlencoded\")&&Ut.test(e.data)&&\"data\");if(a||\"jsonp\"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,\"$1\"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?\"&\":\"?\")+e.jsonp+\"=\"+r),e.converters[\"script json\"]=function(){return o||S.error(r+\" was not called\"),o[0]},e.dataTypes[0]=\"json\",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),\"script\"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument(\"\").body).innerHTML=\"
\",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return\"string\"!=typeof e?[]:(\"boolean\"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument(\"\")).createElement(\"base\")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(\" \");return-1\").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,\"position\"),c=S(e),f={};\"static\"===l&&(e.style.position=\"relative\"),s=c.offset(),o=S.css(e,\"top\"),u=S.css(e,\"left\"),(\"absolute\"===l||\"fixed\"===l)&&-1<(o+u).indexOf(\"auto\")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),\"using\"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if(\"fixed\"===S.css(r,\"position\"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&\"static\"===S.css(e,\"position\"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,\"borderTopWidth\",!0),i.left+=S.css(e,\"borderLeftWidth\",!0))}return{top:t.top-i.top-S.css(r,\"marginTop\",!0),left:t.left-i.left-S.css(r,\"marginLeft\",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&\"static\"===S.css(e,\"position\"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:\"pageXOffset\",scrollTop:\"pageYOffset\"},function(t,i){var o=\"pageYOffset\"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each([\"top\",\"left\"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+\"px\":t})}),S.each({Height:\"height\",Width:\"width\"},function(a,s){S.each({padding:\"inner\"+a,content:s,\"\":\"outer\"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||\"boolean\"!=typeof e),i=r||(!0===e||!0===t?\"margin\":\"border\");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf(\"outer\")?e[\"inner\"+a]:e.document.documentElement[\"client\"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body[\"scroll\"+a],r[\"scroll\"+a],e.body[\"offset\"+a],r[\"offset\"+a],r[\"client\"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each([\"ajaxStart\",\"ajaxStop\",\"ajaxComplete\",\"ajaxError\",\"ajaxSuccess\",\"ajaxSend\"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,\"**\"):this.off(t,e||\"**\",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each(\"blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu\".split(\" \"),function(e,n){S.fn[n]=function(e,t){return 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import Q,unique_terms\n", - "from itables import init_notebook_mode\n", - "\n", - "init_notebook_mode(all_interactive=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "1764bcf2-8ecf-45c8-b617-fe239ceb7b95", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
http://35.192.60.10:8080/\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[4;94mhttp://35.192.60.10:8080/\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
gdc-bq-sample.dev\n",
-       "
\n" - ], - "text/plain": [ - "gdc-bq-sample.dev\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "integration_host = \"http://35.192.60.10:8080/\"\n", - "localhost = \"http://localhost:8080\"\n", - "broad_dev = \"https://cancerdata.dsde-dev.broadinstitute.org/\"\n", - "project_in = \"gdc-bq-sample.dev\"\n", - "project_broad_dev = \"broad-dsde-dev.cda_dev\"\n", - "Q.set_default_project_dataset(project_in)\n", - "Q.set_host_url(integration_host)\n", - "\n", - "print(Q.get_host_url())\n", - "print(Q.get_default_project_dataset())\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81058c54-afe6-4592-b5e7-46c61e646ff1", - "metadata": {}, - "outputs": [], - "source": [ - "mylist = Q('ResearchSubject.Specimen.specimen_type= \"slide\" OR file.data_type = \"Slide Image\"').specimen.file.run(filter=\"id\"\n", - ", show_sql=True)\n", - "df = mylist.to_dataframe()\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cf27f129", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "slidequery = Q(\n", - " 'File.data_type = \"Slide Image\" OR ResearchSubject.Specimen.source_material_type = \"Slides\" OR ResearchSubject.Specimen.specimen_type = \"slide\"'\n", - ")\n", - "\n", - "cptacquery = Q(\n", - " 'File.associated_project = \"%cptac%\" OR subject_associated_project = \"%cptac%\" OR ResearchSubject.member_of_research_project = \"%cptac%\" OR ResearchSubject.Specimen.associated_project = \"%cptac%\"'\n", - ")\n", - "\n", - "myquery = slidequery.AND(cptacquery)\n", - "\n", - "\n", - "\n", - "\n", - "import pandas as pd\n", - "from pandas import DataFrame\n", - "t = myquery.specimen.run()\n", - "\n", - "d = t.join_as_str(\"subject_id\")\n", - "b = t.join_as_str(\"primary_disease_type\")\n", - "\n", - "\n", - "\n", - "\n", - "a = DataFrame([d])\n", - "b2 = DataFrame([b])\n", - "\n", - "\n", - "v = pd.merge(a,b2)\n", - "v\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "beaec8e2", - "metadata": {}, - "outputs": [], - "source": [ - "slidequery = Q(\n", - " 'File.data_type = \"Slide Image\" OR ResearchSubject.Specimen.source_material_type = \"Slides\" OR ResearchSubject.Specimen.specimen_type = \"slide\"')\n", - "\n", - "subids = slidequery.subject.run(filter='id', limit=100000).join_as_str(key=\"id\",delimiter=\",\")\n", - "subfiles = Q(f'id IN ({subids})').to_json()\n", - "print(subfiles)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "658895d4-6a85-4224-a610-3cdd53b450d3", - "metadata": {}, - "outputs": [], - "source": [ - "p = mylist.auto_paginator(to_df=True,limit=20000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "244175d4", - "metadata": {}, - "outputs": [], - "source": [ - "p.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1b65a54-92aa-4d76-ab06-d9d8dd18ce33", - "metadata": {}, - "outputs": [], - "source": [ - "p.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5182c3c-1338-445e-922c-40080f784254", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "d = unique_terms(\n", - " \"species\", host=localhost, table=\"gdc-bq-sample.dev\", show_sql=True\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20d0de23-1a78-4bdc-bd28-0af00b6b44c0", - "metadata": {}, - "outputs": [], - "source": [ - "d.to_list()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81f0a38a-3cdf-4330-82bd-850cf2c094d6", - "metadata": {}, - "outputs": [], - "source": [ - "d = Q('File.associated_project = \"%cptac%\"').file.run(limit=2000,async_call=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "78e7ec96", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;225;190;106;48;2;64;176;166mGetting results from database\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Total execution time: 0 min 4.243 sec 4243 ms \n",
-       "
\n" - ], - "text/plain": [ - "Total execution time: \u001b[1;36m0\u001b[0m min \u001b[1;36m4.243\u001b[0m sec \u001b[1;36m4243\u001b[0m ms \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\"white\",\"not reported\",\"white\",\"white\",\"white\",\"white\",\"white\",\"white\",\"white\",\"white\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"asian\",\"not reported\",\"white\",\"white\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"white\",\"white\",\"white\",\"not reported\",\"not reported\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"not reported\",\"white\",\"black or african american\",\"white\",\"not reported\",\"not \n",
-       "reported\",\"not reported\",\"not reported\",\"not reported\",\"white\",\"white\",\"white\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"not reported\",\"None\",\"white\",\"black or african \n",
-       "american\",\"white\",\"white\",\"white\",\"white\",\"not reported\",\"not reported\",\"white\",\"not reported\",\"white\",\"white\",\"not\n",
-       "reported\",\"white\",\"not reported\",\"white\",\"black or african american\",\"white\",\"white\",\"white\",\"not reported\",\"not \n",
-       "reported\",\"white\",\"not reported\",\"white\",\"white\",\"not reported\",\"not reported\",\"not reported\",\"not \n",
-       "reported\",\"white\",\"white\",\"not reported\",\"white\",\"white\",\"white\",\"white\",\"white\",\"not \n",
-       "reported\",\"white\",\"white\",\"white\",\"black or african american\",\"white\",\"white\",\"not reported\",\"black or african \n",
-       "american\",\"black or african american\",\"black or african american\",\"white\",\"not reported\"\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"asian\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"None\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"black or african \u001b[0m\n", - "\u001b[32mamerican\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not\u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not \u001b[0m\n", - "\u001b[32mreported\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m,\u001b[32m\"black or african \u001b[0m\n", - "\u001b[32mamerican\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"black or african american\"\u001b[0m,\u001b[32m\"white\"\u001b[0m,\u001b[32m\"not reported\"\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "\n", - "Q.set_default_project_dataset(\"http://35.192.60.10:8080/\"\n", - "Q.set_host_url(\"gdc-bq-sample.dev\")\n", - "\n", - "d = Q('ResearchSubject.Specimen.specimen_type = \"slide\"').specimen.run(\n", - " filter=\"\"\"\n", - " id:r_id \n", - " species:things\n", - " sex:gender\n", - " race:me\n", - " ethnicity:like_race\n", - " days_to_birth:born\n", - " subject_associated_project\n", - " vital_status \n", - " days_to_death\n", - " cause_of_death \n", - " identifier\n", - " File.label\n", - " File.data_category\n", - " File.data_type\n", - " File.file_format\n", - " File.data_modality\"\"\",\n", - ")\n", - "\n", - "\n", - "d = d.auto_paginator(limit=4000,to_df=True)\n", - "# print(d.to_dataframe())\n", - "\n", - "print(d.join_as_str(key=\"me\", delimiter=\",\"))\n", - " \n", - " \n", - "import pandas as pd \n", - "pd.DataFrame(d[\"r_id\"]).value_counts().plot(kind=\"bar\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "b6a6ef1d-6b1f-4a54-a8e9-3d9ee2983f50", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "97aee5f58b7d42c4b7df2993cde304bf", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "31b7a275-1270-47ec-98f8-0b9ffba5cfbb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/mutation_tests (1).ipynb b/mutation_tests (1).ipynb deleted file mode 100644 index 1af4fa79..00000000 --- a/mutation_tests (1).ipynb +++ /dev/null @@ -1,410 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "2472f1c8-cbda-431a-92f6-a950f6aa500a", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "application/javascript": "/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */\n!function(e,t){\"use strict\";\"object\"==typeof module&&\"object\"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error(\"jQuery requires a window with a document\");return t(e)}:t(e)}(\"undefined\"!=typeof window?window:this,function(C,e){\"use strict\";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return\"function\"==typeof e&&\"number\"!=typeof e.nodeType&&\"function\"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement(\"script\");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+\"\":\"object\"==typeof e||\"function\"==typeof e?n[o.call(e)]||\"object\":typeof e}var f=\"3.6.0\",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&\"length\"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&(\"array\"===n||0===t||\"number\"==typeof t&&0+~]|\"+M+\")\"+M+\"*\"),U=new RegExp(M+\"|>\"),X=new RegExp(F),V=new RegExp(\"^\"+I+\"$\"),G={ID:new RegExp(\"^#(\"+I+\")\"),CLASS:new RegExp(\"^\\\\.(\"+I+\")\"),TAG:new RegExp(\"^(\"+I+\"|[*])\"),ATTR:new RegExp(\"^\"+W),PSEUDO:new RegExp(\"^\"+F),CHILD:new RegExp(\"^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\\\(\"+M+\"*(even|odd|(([+-]|)(\\\\d*)n|)\"+M+\"*(?:([+-]|)\"+M+\"*(\\\\d+)|))\"+M+\"*\\\\)|)\",\"i\"),bool:new RegExp(\"^(?:\"+R+\")$\",\"i\"),needsContext:new RegExp(\"^\"+M+\"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\\\(\"+M+\"*((?:-\\\\d)?\\\\d*)\"+M+\"*\\\\)|)(?=[^-]|$)\",\"i\")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\\d$/i,K=/^[^{]+\\{\\s*\\[native \\w/,Z=/^(?:#([\\w-]+)|(\\w+)|\\.([\\w-]+))$/,ee=/[+~]/,te=new RegExp(\"\\\\\\\\[\\\\da-fA-F]{1,6}\"+M+\"?|\\\\\\\\([^\\\\r\\\\n\\\\f])\",\"g\"),ne=function(e,t){var n=\"0x\"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\\0-\\x1f\\x7f]|^-?\\d)|^-$|[^\\0-\\x1f\\x7f-\\uFFFF\\w-]/g,ie=function(e,t){return t?\"\\0\"===e?\"\\ufffd\":e.slice(0,-1)+\"\\\\\"+e.charCodeAt(e.length-1).toString(16)+\" \":\"\\\\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&\"fieldset\"===e.nodeName.toLowerCase()},{dir:\"parentNode\",next:\"legend\"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],\"string\"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+\" \"]&&(!v||!v.test(t))&&(1!==p||\"object\"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute(\"id\"))?s=s.replace(re,ie):e.setAttribute(\"id\",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?\"#\"+s:\":scope\")+\" \"+xe(l[o]);c=l.join(\",\")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute(\"id\")}}}return g(t.replace($,\"$1\"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+\" \")>b.cacheLength&&delete e[r.shift()],e[t+\" \"]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement(\"fieldset\");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split(\"|\"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return\"input\"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return(\"input\"===t||\"button\"===t)&&e.type===n}}function ge(t){return function(e){return\"form\"in e?e.parentNode&&!1===e.disabled?\"label\"in e?\"label\"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:\"label\"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&\"undefined\"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||\"HTML\")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener(\"unload\",oe,!1):n.attachEvent&&n.attachEvent(\"onunload\",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement(\"div\")),\"undefined\"!=typeof e.querySelectorAll&&!e.querySelectorAll(\":scope fieldset div\").length}),d.attributes=ce(function(e){return e.className=\"i\",!e.getAttribute(\"className\")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment(\"\")),!e.getElementsByTagName(\"*\").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute(\"id\")===t}},b.find.ID=function(e,t){if(\"undefined\"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t=\"undefined\"!=typeof e.getAttributeNode&&e.getAttributeNode(\"id\");return t&&t.value===n}},b.find.ID=function(e,t){if(\"undefined\"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode(\"id\"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode(\"id\"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return\"undefined\"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if(\"*\"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if(\"undefined\"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML=\"\",e.querySelectorAll(\"[msallowcapture^='']\").length&&v.push(\"[*^$]=\"+M+\"*(?:''|\\\"\\\")\"),e.querySelectorAll(\"[selected]\").length||v.push(\"\\\\[\"+M+\"*(?:value|\"+R+\")\"),e.querySelectorAll(\"[id~=\"+S+\"-]\").length||v.push(\"~=\"),(t=C.createElement(\"input\")).setAttribute(\"name\",\"\"),e.appendChild(t),e.querySelectorAll(\"[name='']\").length||v.push(\"\\\\[\"+M+\"*name\"+M+\"*=\"+M+\"*(?:''|\\\"\\\")\"),e.querySelectorAll(\":checked\").length||v.push(\":checked\"),e.querySelectorAll(\"a#\"+S+\"+*\").length||v.push(\".#.+[+~]\"),e.querySelectorAll(\"\\\\\\f\"),v.push(\"[\\\\r\\\\n\\\\f]\")}),ce(function(e){e.innerHTML=\"\";var t=C.createElement(\"input\");t.setAttribute(\"type\",\"hidden\"),e.appendChild(t).setAttribute(\"name\",\"D\"),e.querySelectorAll(\"[name=d]\").length&&v.push(\"name\"+M+\"*[*^$|!~]?=\"),2!==e.querySelectorAll(\":enabled\").length&&v.push(\":enabled\",\":disabled\"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(\":disabled\").length&&v.push(\":enabled\",\":disabled\"),e.querySelectorAll(\"*,:x\"),v.push(\",.*:\")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,\"*\"),c.call(e,\"[s!='']:x\"),s.push(\"!=\",F)}),v=v.length&&new RegExp(v.join(\"|\")),s=s.length&&new RegExp(s.join(\"|\")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+\" \"]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0\":{dir:\"parentNode\",first:!0},\" \":{dir:\"parentNode\"},\"+\":{dir:\"previousSibling\",first:!0},\"~\":{dir:\"previousSibling\"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||\"\").replace(te,ne),\"~=\"===e[2]&&(e[3]=\" \"+e[3]+\" \"),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),\"nth\"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*(\"even\"===e[3]||\"odd\"===e[3])),e[5]=+(e[7]+e[8]||\"odd\"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||\"\":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(\")\",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return\"*\"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+\" \"];return t||(t=new RegExp(\"(^|\"+M+\")\"+e+\"(\"+M+\"|$)\"))&&m(e,function(e){return t.test(\"string\"==typeof e.className&&e.className||\"undefined\"!=typeof e.getAttribute&&e.getAttribute(\"class\")||\"\")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?\"!=\"===r:!r||(t+=\"\",\"=\"===r?t===i:\"!=\"===r?t!==i:\"^=\"===r?i&&0===t.indexOf(i):\"*=\"===r?i&&-1\",\"#\"===e.firstChild.getAttribute(\"href\")})||fe(\"type|href|height|width\",function(e,t,n){if(!n)return e.getAttribute(t,\"type\"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML=\"\",e.firstChild.setAttribute(\"value\",\"\"),\"\"===e.firstChild.getAttribute(\"value\")})||fe(\"value\",function(e,t,n){if(!n&&\"input\"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute(\"disabled\")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);S.find=d,S.expr=d.selectors,S.expr[\":\"]=S.expr.pseudos,S.uniqueSort=S.unique=d.uniqueSort,S.text=d.getText,S.isXMLDoc=d.isXML,S.contains=d.contains,S.escapeSelector=d.escape;var h=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&S(e).is(n))break;r.push(e)}return r},T=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},k=S.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var N=/^<([a-z][^\\/\\0>:\\x20\\t\\r\\n\\f]*)[\\x20\\t\\r\\n\\f]*\\/?>(?:<\\/\\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):\"string\"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,\"string\"==typeof e){if(!(r=\"<\"===e[0]&&\">\"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\\x20\\t\\r\\n\\f]*)/i,he=/^$|^module$|\\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement(\"div\")),(fe=E.createElement(\"input\")).setAttribute(\"type\",\"radio\"),fe.setAttribute(\"checked\",\"checked\"),fe.setAttribute(\"name\",\"t\"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML=\"\",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML=\"\",y.option=!!ce.lastChild;var ge={thead:[1,\"\",\"
\"],col:[2,\"\",\"
\"],tr:[2,\"\",\"
\"],td:[3,\"\",\"
\"],_default:[0,\"\",\"\"]};function ve(e,t){var n;return n=\"undefined\"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||\"*\"):\"undefined\"!=typeof e.querySelectorAll?e.querySelectorAll(t||\"*\"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n\",\"\"]);var me=/<|&#?\\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\\s*$/g;function je(e,t){return A(e,\"table\")&&A(11!==t.nodeType?t:t.firstChild,\"tr\")&&S(e).children(\"tbody\")[0]||e}function De(e){return e.type=(null!==e.getAttribute(\"type\"))+\"/\"+e.type,e}function qe(e){return\"true/\"===(e.type||\"\").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute(\"type\"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,\"handle events\"),s)for(n=0,r=s[i].length;n\").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on(\"load error\",i=function(e){r.remove(),i=null,e&&t(\"error\"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\\?(?=&|$)|\\?\\?/;S.ajaxSetup({jsonp:\"callback\",jsonpCallback:function(){var e=zt.pop()||S.expando+\"_\"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter(\"json jsonp\",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?\"url\":\"string\"==typeof e.data&&0===(e.contentType||\"\").indexOf(\"application/x-www-form-urlencoded\")&&Ut.test(e.data)&&\"data\");if(a||\"jsonp\"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,\"$1\"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?\"&\":\"?\")+e.jsonp+\"=\"+r),e.converters[\"script json\"]=function(){return o||S.error(r+\" was not called\"),o[0]},e.dataTypes[0]=\"json\",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),\"script\"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument(\"\").body).innerHTML=\"
\",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return\"string\"!=typeof e?[]:(\"boolean\"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument(\"\")).createElement(\"base\")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(\" \");return-1\").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,\"position\"),c=S(e),f={};\"static\"===l&&(e.style.position=\"relative\"),s=c.offset(),o=S.css(e,\"top\"),u=S.css(e,\"left\"),(\"absolute\"===l||\"fixed\"===l)&&-1<(o+u).indexOf(\"auto\")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),\"using\"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if(\"fixed\"===S.css(r,\"position\"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&\"static\"===S.css(e,\"position\"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,\"borderTopWidth\",!0),i.left+=S.css(e,\"borderLeftWidth\",!0))}return{top:t.top-i.top-S.css(r,\"marginTop\",!0),left:t.left-i.left-S.css(r,\"marginLeft\",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&\"static\"===S.css(e,\"position\"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:\"pageXOffset\",scrollTop:\"pageYOffset\"},function(t,i){var o=\"pageYOffset\"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each([\"top\",\"left\"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+\"px\":t})}),S.each({Height:\"height\",Width:\"width\"},function(a,s){S.each({padding:\"inner\"+a,content:s,\"\":\"outer\"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||\"boolean\"!=typeof e),i=r||(!0===e||!0===t?\"margin\":\"border\");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf(\"outer\")?e[\"inner\"+a]:e.document.documentElement[\"client\"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body[\"scroll\"+a],r[\"scroll\"+a],e.body[\"offset\"+a],r[\"offset\"+a],r[\"client\"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each([\"ajaxStart\",\"ajaxStop\",\"ajaxComplete\",\"ajaxError\",\"ajaxSuccess\",\"ajaxSend\"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,\"**\"):this.off(t,e||\"**\",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each(\"blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu\".split(\" \"),function(e,n){S.fn[n]=function(e,t){return 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
2023.6.13\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m2023.6\u001b[0m.\u001b[1;36m13\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import (\n", - " Q, columns, unique_terms)\n", - "import numpy as np\n", - "import pandas as pd\n", - "from itables import init_notebook_mode, show\n", - "init_notebook_mode(all_interactive=True)\n", - "import itables.options as opt\n", - "opt.maxBytes=0\n", - "opt.scrollX=\"200px\"\n", - "opt.scrollCollapse=True\n", - "opt.paging=True\n", - "opt.maxColumns=0\n", - "print(Q.get_version())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d105760b", - "metadata": {}, - "outputs": [], - "source": [ - "query1 = Q('treatment_anatomic_site = \"Cervix\"')\n", - "query2 = Q('primary_diagnosis_site = \"%uter%\" OR primary_diagnosis_site = \"%cerv%\"')\n", - "query3 = Q('primary_diagnosis_condition != \"Adenomas and Adenocarcinomas\"')\n", - "print(query1.OR(query2).AND(query3).to_json())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c17061f6", - "metadata": {}, - "outputs": [], - "source": [ - "print(Q(\"\"\"\n", - "treatment_anatomic_site = \"Cervix\" OR \n", - "primary_diagnosis_site = \"%uter%\" OR primary_diagnosis_site = \"%cerv%\"\n", - "AND primary_diagnosis_condition != \"Adenomas and Adenocarcinomas\"\n", - " \"\"\").to_json())\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9698f51c", - "metadata": {}, - "outputs": [], - "source": [ - "columns().to_list(filters=\"TP53\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a7693974-65a0-45d9-a958-38f08cfd0661", - "metadata": { - "scrolled": false, - "tags": [] - }, - "outputs": [], - "source": [ - "unique_terms(\"TP53\", show_counts=True).to_dataframe()" - ] - }, - { - "cell_type": "markdown", - "id": "3d4c94ac", - "metadata": {}, - "source": [ - "##### unique_terms(\"Gene\").to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "c3b4fcc1-cbf8-48bf-8ff0-ccb25a6e318e", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m integration_table \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mgdc-bq-sample.dev\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> 4\u001b[0m \u001b[39mprint\u001b[39m(Q(\u001b[39m\"\u001b[39;49m\u001b[39m SYMBOL LIKE \u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mTP53\u001b[39;49m\u001b[39m%\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mspecimen\u001b[39m.\u001b[39;49mrun(show_sql\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,host\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mhttp://localhost:8080\u001b[39;49m\u001b[39m\"\u001b[39;49m,table\u001b[39m=\u001b[39;49mintegration_table))\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/decorators/measure.py:30\u001b[0m, in \u001b[0;36mMeasure.__call__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mkwargs \u001b[39m=\u001b[39m kwargs\n\u001b[1;32m 29\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 30\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 31\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresult\n\u001b[1;32m 32\u001b[0m \u001b[39mfinally\u001b[39;00m:\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Q.py:673\u001b[0m, in \u001b[0;36mQ.run\u001b[0;34m(self, offset, limit, limit, version, host, dry_run, table, async_call, verify, verbose, include, format_type, show_sql)\u001b[0m\n\u001b[1;32m 670\u001b[0m dryClass \u001b[39m=\u001b[39m DryClass(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mapi_response\u001b[39m.\u001b[39mto_dict())\n\u001b[1;32m 671\u001b[0m \u001b[39mreturn\u001b[39;00m dryClass\n\u001b[0;32m--> 673\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__get_query_result(\n\u001b[1;32m 674\u001b[0m api_instance\u001b[39m=\u001b[39;49mapi_instance,\n\u001b[1;32m 675\u001b[0m query_id\u001b[39m=\u001b[39;49mapi_response\u001b[39m.\u001b[39;49mquery_id,\n\u001b[1;32m 676\u001b[0m offset\u001b[39m=\u001b[39;49mPAGEOFFSET,\n\u001b[1;32m 677\u001b[0m limit\u001b[39m=\u001b[39;49mlimit,\n\u001b[1;32m 678\u001b[0m async_req\u001b[39m=\u001b[39;49masync_call,\n\u001b[1;32m 679\u001b[0m show_sql\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_show_sql,\n\u001b[1;32m 680\u001b[0m show_count\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,\n\u001b[1;32m 681\u001b[0m format_type\u001b[39m=\u001b[39;49mformat_type,\n\u001b[1;32m 682\u001b[0m )\n\u001b[1;32m 683\u001b[0m \u001b[39mexcept\u001b[39;00m ServiceException \u001b[39mas\u001b[39;00m http_error:\n\u001b[1;32m 684\u001b[0m \u001b[39mif\u001b[39;00m verbose:\n", - "File \u001b[0;32m~/Documents/python/working/cda-python/cdapython/Q.py:567\u001b[0m, in \u001b[0;36mQ.__get_query_result\u001b[0;34m(self, api_instance, query_id, offset, limit, async_req, pre_stream, show_sql, show_count, format_type)\u001b[0m\n\u001b[1;32m 564\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(response, ApplyResult):\n\u001b[1;32m 565\u001b[0m response \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mget()\n\u001b[0;32m--> 567\u001b[0m sleep(\u001b[39m2.5\u001b[39;49m)\n\u001b[1;32m 568\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mtotal_row_count \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 569\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_result_object(\n\u001b[1;32m 570\u001b[0m api_response\u001b[39m=\u001b[39mresponse,\n\u001b[1;32m 571\u001b[0m query_id\u001b[39m=\u001b[39mquery_id,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 577\u001b[0m format_type\u001b[39m=\u001b[39mformat_type,\n\u001b[1;32m 578\u001b[0m )\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "integration_table = \"gdc-bq-sample.dev\"\n", - "\n", - "\n", - "print(Q(\" SYMBOL LIKE 'TP53%'\").specimen.run(show_sql=True,host=\"http://localhost:8080\",table=integration_table))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f27ed7b7-3ff5-40ed-9144-874402ccc7d5", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df = results.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b33c502f-cf4c-40e6-a2e2-95ff75b50007", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "len(df)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58563df0", - "metadata": {}, - "outputs": [], - "source": [ - "all_entrez" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0a5e9d8e", - "metadata": {}, - "outputs": [], - "source": [ - "all_entrez.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ac9c36b-7307-4e4d-a033-90afc461877c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df_ids = all_entrez.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "decc579d", - "metadata": {}, - "outputs": [], - "source": [ - "df_ids" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79fa3900-a1f2-44c0-82be-6781cc895e5e", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "temp = list(df_ids['Entrez_Gene_Id'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf7dc5b6", - "metadata": {}, - "outputs": [], - "source": [ - "temp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f07fb86b-caa3-4d31-81f7-95648720d21d", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "with open('temp.txt', 'w') as outf: outf.write('\\n'.join(map(str, temp)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "68e55217-84a5-4c96-85b1-2a3786d3578a", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "tp53 = Q(\"Gene = 'TP53'\")\n", - "results2 = tp53.researchsubject.run()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9cda16d1-1036-4a71-afc9-3a427da3a70a", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df2 = results2.to_dataframe()\n", - "df2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e93fe903-0ffd-489a-ad86-3dc98068331c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "len(df2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c861b958-a106-44a6-af8c-e86b391b7ab5", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "all_genes = unique_terms(\"Gene\").get_all()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "61a87143-933b-4987-814d-ffa77e6fc713", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df_genes = all_genes.to_dataframe()\n", - "temp2 = df_genes['Gene']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ba9e0490-27fb-4ba2-8ef3-4e82255f70ce", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "with open('temp2.txt', 'w') as outf: outf.write('\\n'.join(map(str, temp2)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "800ab924", - "metadata": {}, - "outputs": [], - "source": [ - "columns().to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e72430b1-c528-42b0-819a-bf9e1a0810d6", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"subject_associated_project\").to_dataframe().to_csv(\"allprojects.tsv\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/.DS_Store b/notebooks/.DS_Store deleted file mode 100644 index 0949ce4dfb647c9ea5f734ba7a35bdd19c8b7529..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKyG{f#44l9fCn2OOC@Av-oJ2_%PDeq_55NKi(!wDg5_I`1J`ZN>fNo)FPXgJJ zJx**V*^xF60BP}jbqLG=OsT>oXGGLJ8ro{f!enFIqsKK4@I?DJL~GyUYF}`HTdc4D z(Z1pVbKJ216?f=P`_*OFuR7LZUc)sYcO@=~IgruM{0 zUe5m9@=C+%=;cT@GskH&e?DH4&CdR8=}5&fY%mZE3>nz==1k}R34fWyOXVS(~ZnREhGc918uxC4dd>Bl{Ngc$Uw+(y%&e6!lv\n", - "Julia is an oncologist that specializes in female reproductive health. As part of her research, she is interested in using existing data on uterine cancers. If possible, she would like to see multiple datatypes (gross imaging, genomic data, proteomic data, histology) that come from the same patient, so she can look for shared phenotypes to test for their potential as early diagnostics. Julia heard that the Cancer Data Aggregator has made it easy to search across multiple datasets created by NCI, and so has decided to start her search there.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "14e45a9b", - "metadata": {}, - "source": [ - "## Getting Started\n", - "\n", - "The CDA provides a custom python tool for searching CDA data. [`Q`](usage/#q) (short for Query) offers several ways to search and filter data, and several input modes:\n", - "\n", - "---\n", - "- **Q.()** builds a query that can be used by `run()` or `count()`\n", - "- **Q.run()** returns data for the specified search \n", - "- **Q.count()** returns summary information (counts) data that fit the specified search\n", - "- **columns()** returns entity field names\n", - "- **unique_terms()** returns entity field contents\n", - "\n", - "---\n", - "\n", - "Before Julia does any work, she needs to import these functions cdapython.\n", - "She'll also need to import [pandas](https://pandas.pydata.org/) to get nice dataframes.\n", - "Finally, she tells cdapython to report it's version so she can be sure she's using the one she means to:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "a5265d4d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
2022.9.15\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m2022.9\u001b[0m.\u001b[1;36m15\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import Q, columns, unique_terms, query\n", - "import cdapython\n", - "import pandas as pd \n", - "print(cdapython.__version__)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "c577d416", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'https://cancerdata.dsde-dev.broadinstitute.org/'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Q.set_default_project_dataset(\"broad-dsde-dev.cda_dev\")\n", - "Q.set_host_url(\"https://cancerdata.dsde-dev.broadinstitute.org/\")\n", - "Q.get_host_url()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "7a4e1b8b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'broad-dsde-dev.cda_dev'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Q.get_default_project_dataset()" - ] - }, - { - "cell_type": "markdown", - "id": "75eef23e", - "metadata": {}, - "source": [ - "
\n", - " \n", - " \n", - "CDA data comes from three sources:\n", - " \n", - " \n", - "The CDA makes this data searchable in four main endpoints:\n", - "\n", - "
    \n", - "
  • subject: A patient entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subjects privacy.
  • \n", - "
  • researchsubject: A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subjects privacy. This entity plays the role of the case_id in existing data. An individual who participates in 3 studies will have 3 researchsubject IDs
  • \n", - "
  • specimen: Any material taken as a sample from a biological entity (living or dead), or from a physical object or the environment. Specimens are usually collected as an example of their kind, often for use in some investigation.
  • \n", - "
  • file: A unit of data about subjects, researchsubjects, specimens, or their associated information
  • \n", - "
\n", - "and two endpoints that offer deeper information about data in the researchsubject endpoint:\n", - "
    \n", - "
  • diagnosis: A collection of characteristics that describe an abnormal condition of the body as assessed at a point in time. May be used to capture information about neoplastic and non-neoplastic conditions.
  • \n", - "
  • treatment: Represent medication administration or other treatment types.
  • \n", - "
\n", - "Any metadata field can be searched from any endpoint, the only difference between search types is what type of data is returned by default. This means that you can think of the CDA as a really, really enormous spreadsheet full of data. To search this enormous spreadsheet, you'd want select columns, and then filter rows.\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "id": "391bc9a7", - "metadata": {}, - "source": [ - "## Finding Search Terms\n", - "\n", - "\n", - " \n", - " \n", - " Accordingly, to see what search fields are available, Julia starts by using the command `columns`:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ef0dd8e5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n",
-       "    {'AA_MAF': 'Non-reference allele and frequency of existing variant in NHLBI-ESP African American population'},\n",
-       "    {\n",
-       "        'AFR_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined African \n",
-       "population'\n",
-       "    },\n",
-       "    {'ALLELE_NUM': 'Allele number from input; 0 is reference, 1 is first alternate etc.'},\n",
-       "    {\n",
-       "        'AMR_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined American \n",
-       "population'\n",
-       "    },\n",
-       "    {'Allele': 'The variant allele used to calculate the consequence'},\n",
-       "    {\n",
-       "        'Amino_acids': 'Amino acid substitution caused by the mutation. Only given if the variation affects the \n",
-       "protein-coding sequence'\n",
-       "    },\n",
-       "    {'BIOTYPE': 'Biotype of transcript'},\n",
-       "    {\n",
-       "        'CANONICAL': 'A flag (YES) indicating that the VEP-based canonical transcript, the longest translation, was\n",
-       "used for this gene. If not, the value is null'\n",
-       "    },\n",
-       "    {'CCDS': 'The  CCDS identifier for this transcript, where applicable'},\n",
-       "    {\n",
-       "        'CDS_position': 'Relative position of base pair in coding sequence. A - symbol is displayed as the \n",
-       "numerator if the variant does not appear in coding sequence'\n",
-       "    },\n",
-       "    {'CLIN_SIG': 'Clinical significance of variant from dbSNP'},\n",
-       "    {'CONTEXT': 'The reference allele per VCF specs, and its five flanking base pairs'},\n",
-       "    {'COSMIC': 'Overlapping COSMIC variants'},\n",
-       "    {'Center': 'One or more genome sequencing center reporting the variant'},\n",
-       "    {'Chromosome': 'Chromosome, possible values: chr1-22, and chrX'},\n",
-       "    {'Codons': 'The alternative codons with the variant base in upper case'},\n",
-       "    {'Consequence': 'Consequence type of this variant; sequence ontology terms'},\n",
-       "    {'DISTANCE': 'Shortest distance from the variant to transcript'},\n",
-       "    {'DOMAINS': 'The source and identifier of any overlapping protein domains'},\n",
-       "    {\n",
-       "        'EAS_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined East Asian \n",
-       "population'\n",
-       "    },\n",
-       "    {\n",
-       "        'EA_MAF': 'Non-reference allele and frequency of existing variant in NHLBI-ESP European American \n",
-       "population'\n",
-       "    },\n",
-       "    {'ENSP': 'The Ensembl protein identifier of the affected transcript'},\n",
-       "    {\n",
-       "        'EUR_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined European \n",
-       "population'\n",
-       "    },\n",
-       "    {'EXON': 'The exon number (out of total number)'},\n",
-       "    {\n",
-       "        'End_Position': 'Highest numeric genomic position of the reported variant on the genomic reference \n",
-       "sequence. Mutation end coordinate'\n",
-       "    },\n",
-       "    {\n",
-       "        'Entrez_Gene_Id': 'Entrez gene ID (an integer). 0 is used for regions that do not correspond to a gene \n",
-       "region or Ensembl ID'\n",
-       "    },\n",
-       "    {'ExAC_AF': 'Global Allele Frequency from   ExAC'},\n",
-       "    {'ExAC_AF_AFR': 'African/African American Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_AMR': 'American Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_Adj': 'Adjusted Global Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_EAS': 'East Asian Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_FIN': 'Finnish Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_NFE': 'Non-Finnish European Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_OTH': 'Other Allele Frequency from ExAC'},\n",
-       "    {'ExAC_AF_SAS': 'South Asian Allele Frequency from ExAC'},\n",
-       "    {'Existing_variation': 'Known identifier of existing variation'},\n",
-       "    {'Exon_Number': 'The exon number (out of total number)'},\n",
-       "    {\n",
-       "        'FILTER': 'Copied from input VCF. This includes filters implemented directly by the variant caller and \n",
-       "other external software used in the DNA-Seq pipeline. See below for additional details.'\n",
-       "    },\n",
-       "    {'Feature': 'Stable Ensembl ID of feature (transcript, regulatory, motif)'},\n",
-       "    {'Feature_type': 'Type of feature. Currently one of Transcript, RegulatoryFeature, MotifFeature (or blank)'},\n",
-       "    {'GDC_FILTER': 'GDC filters applied universally across all MAFs'},\n",
-       "    {\n",
-       "        'GDC_Validation_Status': 'GDC implementation of validation checks. See notes section (#5) below for \n",
-       "details'\n",
-       "    },\n",
-       "    {'GMAF': 'Non-reference allele and frequency of existing variant in   1000 Genomes'},\n",
-       "    {'Gene': 'The gene symbol. In this table, gene symbol is gene name e.g. ACADVL'},\n",
-       "    {'HGNC_ID': 'Gene identifier from the HUGO Gene Nomenclature Committee if applicable'},\n",
-       "    {'HGVS_OFFSET': 'Indicates by how many bases the HGVS notations for this variant have been shifted'},\n",
-       "    {'HGVSc': 'The coding sequence of the variant in HGVS recommended format'},\n",
-       "    {\n",
-       "        'HGVSp': 'The protein sequence of the variant in HGVS recommended format. p.= signifies no change in the \n",
-       "protein'\n",
-       "    },\n",
-       "    {'HGVSp_Short': 'Same as the HGVSp column, but using 1-letter amino-acid codes'},\n",
-       "    {\n",
-       "        'Hugo_Symbol': 'HUGO symbol for the gene (HUGO symbols are always in all caps). Unknown is used for regions\n",
-       "that do not correspond to a gene'\n",
-       "    },\n",
-       "    {'IMPACT': 'The impact modifier for the consequence type'},\n",
-       "    {'INTRON': 'The intron number (out of total number)'},\n",
-       "    {'MC3_Overlap': 'Indicates whether this region overlaps with an MC3 variant for the same sample pair'},\n",
-       "    {\n",
-       "        'MINIMISED': 'Alleles in this variant have been converted to minimal representation before consequence \n",
-       "calculation (1 or null)'\n",
-       "    },\n",
-       "    {'Matched_Norm_Sample_UUID': 'Unique GDC identifier for normal aliquot (10189 unique)'},\n",
-       "    {\n",
-       "        'Mutation_Status': 'An assessment of the mutation as somatic, germline, LOH, post transcriptional \n",
-       "modification, unknown, or none. The values allowed in this field are constrained by the value in the \n",
-       "Validation_Status field'\n",
-       "    },\n",
-       "    {'NCBI_Build': 'The reference genome used for the alignment (GRCh38)'},\n",
-       "    {\n",
-       "        'One_Consequence': 'The single consequence of the canonical transcript in  sequence ontology terms, eg \n",
-       "missense_variant'\n",
-       "    },\n",
-       "    {'PHENO': 'Indicates if existing variant is associated with a phenotype, disease or trait (0, 1, or null)'},\n",
-       "    {'PICK': \"Indicates if this block of consequence data was picked by VEP's   pick feature (1 or null)\"},\n",
-       "    {'PUBMED': 'Pubmed ID(s) of publications that cite existing variant'},\n",
-       "    {'PolyPhen': 'The PolyPhen prediction and/or score'},\n",
-       "    {\n",
-       "        'Protein_position': 'Relative position of affected amino acid in protein. A - symbol is displayed as the \n",
-       "numerator if the variant does not appear in coding sequence'\n",
-       "    },\n",
-       "    {'RefSeq': 'RefSeq identifier for this transcript'},\n",
-       "    {\n",
-       "        'Reference_Allele': 'The plus strand reference allele at this position. Includes the deleted sequence for a\n",
-       "deletion or - for an insertion'\n",
-       "    },\n",
-       "    {\n",
-       "        'SAS_MAF': 'Non-reference allele and frequency of existing variant in 1000 Genomes combined South Asian \n",
-       "population'\n",
-       "    },\n",
-       "    {'SIFT': 'The   SIFT prediction and/or score, with both given as prediction (score)'},\n",
-       "    {'SOMATIC': 'Somatic status of each ID reported under Existing_variation (0, 1, or null)'},\n",
-       "    {'SWISSPROT': 'UniProtKB/Swiss-Prot accession'},\n",
-       "    {'SYMBOL': 'Eg TP53, LRP1B, etc (same as Hugo_Symbol field except blank instead of Unknown'},\n",
-       "    {\n",
-       "        'SYMBOL_SOURCE': 'The source of the gene symbol, usually HGNC, rarely blank, other sources include \n",
-       "Uniprot_gn, EntrezGene, etc'\n",
-       "    },\n",
-       "    {'Sequencer': 'Instrument used to produce primary sequence data'},\n",
-       "    {\n",
-       "        'Start_Position': 'Lowest numeric position of the reported variant on the genomic reference sequence. \n",
-       "Mutation start coordinate'\n",
-       "    },\n",
-       "    {'Strand': 'Either + or - to denote whether read mapped to the sense (+) or anti-sense (-) strand'},\n",
-       "    {'TRANSCRIPT_STRAND': 'The DNA strand (1 or -1) on which the transcript/feature lies'},\n",
-       "    {'TREMBL': 'UniProtKB/TrEMBL identifier of protein product'},\n",
-       "    {'TSL': 'Transcript support level, which is based on independent RNA analyses'},\n",
-       "    {'Transcript_ID': 'Ensembl ID of the transcript affected by the variant'},\n",
-       "    {'Tumor_Sample_UUID': 'Unique GDC identifier for tumor aliquot (10189 unique)'},\n",
-       "    {\n",
-       "        'Tumor_Seq_Allele1': 'Primary data genotype for tumor sequencing (discovery) allele 1. A - symbol for a \n",
-       "deletion represents a variant. A - symbol for an insertion represents wild-type allele. Novel inserted sequence for\n",
-       "insertion does not include flanking reference bases'\n",
-       "    },\n",
-       "    {\n",
-       "        'Tumor_Seq_Allele2': 'Primary data genotype for tumor sequencing (discovery) allele 2. A - symbol for a \n",
-       "deletion represents a variant. A - symbol for an insertion represents wild-type allele. Novel inserted sequence for\n",
-       "insertion does not include flanking reference bases'\n",
-       "    },\n",
-       "    {\n",
-       "        'Tumor_Validation_Allele1': 'Secondary data from orthogonal technology. Tumor genotyping (validation) for \n",
-       "allele 1. A - symbol for a deletion represents a variant. A - symbol for an insertion represents wild-type allele. \n",
-       "Novel inserted sequence for insertion does not include flanking reference bases'\n",
-       "    },\n",
-       "    {\n",
-       "        'Tumor_Validation_Allele2': 'Secondary data from orthogonal technology. Tumor genotyping (validation) for \n",
-       "allele 2'\n",
-       "    },\n",
-       "    {'UNIPARC': 'UniParc identifier of protein product'},\n",
-       "    {'VARIANT_CLASS': 'Sequence Ontology variant class'},\n",
-       "    {'Validation_Method': 'The assay platforms used for the validation call'},\n",
-       "    {'Variant_Classification': 'Translational effect of variant allele'},\n",
-       "    {\n",
-       "        'Variant_Type': 'Type of mutation. TNP (tri-nucleotide polymorphism) is analogous to DNP (di-nucleotide \n",
-       "polymorphism) but for three consecutive nucleotides. ONP (oligo-nucleotide polymorphism) is analogous to TNP but \n",
-       "for consecutive runs of four or more (SNP, DNP, TNP, ONP, INS, DEL, or Consolidated)'\n",
-       "    },\n",
-       "    {'age_at_diagnosis': 'The age in days of the individual at the time of diagnosis'},\n",
-       "    {'aliquot_barcode_normal': 'TCGA aliquot barcode for the normal control, eg TCGA-12-1089-01A-01D-0517-01'},\n",
-       "    {'aliquot_barcode_tumor': 'TCGA aliquot barcode for the tumor, eg TCGA-12-1089-01A-01D-0517-01'},\n",
-       "    {\n",
-       "        'all_effects': 'A semicolon delimited list of all possible variant effects, sorted by priority \n",
-       "([Symbol,Consequence,HGVSp_Short,Transcript_ID,RefSeq,HGVSc,Impact,Canonical,Sift,PolyPhen,Strand])'\n",
-       "    },\n",
-       "    {\n",
-       "        'anatomical_site': 'Per GDC Dictionary, the text term that represents the name of the primary disease site \n",
-       "of the submitted tumor sample; recommend dropping tumor; biospecimen_anatomic_site.'\n",
-       "    },\n",
-       "    {'byte_size': 'Size of the file in bytes. Maps to dcat:byteSize.'},\n",
-       "    {\n",
-       "        'cDNA_position': 'Relative position of base pair in the cDNA sequence as a fraction. A - symbol is \n",
-       "displayed as the numerator if the variant does not appear in cDNA'\n",
-       "    },\n",
-       "    {\n",
-       "        'callerName': '|-delimited list of mutation caller(s) that agreed on this particular call, always in \n",
-       "alphabetical order: muse, mutect, somaticsniper, varscan'\n",
-       "    },\n",
-       "    {'case_barcode': 'Original TCGA case barcode, eg TCGA-DX-A8BN'},\n",
-       "    {'case_id': 'Unique GDC identifier for the underlying case'},\n",
-       "    {\n",
-       "        'cause_of_death': 'Coded value indicating the circumstance or condition that results in the death of the \n",
-       "subject.'\n",
-       "    },\n",
-       "    {\n",
-       "        'checksum': 'A digit representing the sum of the correct digits in a piece of stored or transmitted digital\n",
-       "data, against which later comparisons can be made to detect errors in the data.'\n",
-       "    },\n",
-       "    {'data_category': 'Broad categorization of the contents of the data file.'},\n",
-       "    {\n",
-       "        'data_modality': 'Data modality describes the biological nature of the information gathered as the result \n",
-       "of an Activity, independent of the technology or methods used to produce the information.'\n",
-       "    },\n",
-       "    {'data_type': 'Specific content type of the data file.'},\n",
-       "    {\n",
-       "        'days_to_birth': \"Number of days between the date used for index and the date from a person's date of birth\n",
-       "represented as a calculated negative number of days.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'days_to_collection': 'The number of days from the index date to either the date a sample was collected for\n",
-       "a specific study or project, or the date a patient underwent a procedure (e.g. surgical resection) yielding a \n",
-       "sample that was eventually used for research.'\n",
-       "    },\n",
-       "    {\n",
-       "        'days_to_death': \"Number of days between the date used for index and the date from a person's date of death\n",
-       "represented as a calculated number of days.\"\n",
-       "    },\n",
-       "    {'days_to_treatment_end': ' The timepoint at which the treatment ended.'},\n",
-       "    {'days_to_treatment_start': 'The timepoint at which the treatment started.'},\n",
-       "    {\n",
-       "        'dbSNP_RS': 'The rs-IDs from the   dbSNP database, novel if not found in any database used, or null if \n",
-       "there is no dbSNP record, but it is found in other databases'\n",
-       "    },\n",
-       "    {\n",
-       "        'dbSNP_Val_Status': 'The dbSNP validation status is reported as a semicolon-separated list of statuses. The\n",
-       "union of all rs-IDs is taken when there are multiple'\n",
-       "    },\n",
-       "    {'dbgap_accession_number': 'The dbgap accession number for the project.'},\n",
-       "    {'derived_from_specimen': 'A source/parent specimen from which this one was directly derived.'},\n",
-       "    {\n",
-       "        'derived_from_subject': 'The Patient/ResearchSubject, or Biologically Derived Materal (e.g. a cell line, \n",
-       "tissue culture, organoid) from which the specimen was directly or indirectly derived.'\n",
-       "    },\n",
-       "    {\n",
-       "        'diagnosis_id': \"The 'logical' identifier of the entity in the repository, e.g. a UUID.  This 'id' is \n",
-       "unique within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'diagnosis_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'diagnosis_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {'drs_uri': 'A string of characters used to identify a resource on the Data Repo Service(DRS).'},\n",
-       "    {\n",
-       "        'ethnicity': \"An individual's self-described social and cultural grouping, specifically whether an \n",
-       "individual describes themselves as Hispanic or Latino. The provided values are based on the categories defined by \n",
-       "the U.S. Office of Management and Business and used by the U.S. Census Bureau.\"\n",
-       "    },\n",
-       "    {'fileName': '|-delimited list of name of underlying MAF file'},\n",
-       "    {'fileUUID': '|-delimited list of unique GDC identifiers for underlying MAF file'},\n",
-       "    {\n",
-       "        'file_associated_project': 'A reference to the Project(s) of which this ResearchSubject is a member. The \n",
-       "associated_project may be embedded using the $ref definition or may be a reference to the id for the Project - or a\n",
-       "URI expressed as a string to an existing entity.'\n",
-       "    },\n",
-       "    {'file_format': 'Format of the data files.'},\n",
-       "    {\n",
-       "        'file_id': \"The 'logical' identifier of the entity in the repository, e.g. a UUID.  This 'id' is unique \n",
-       "within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'file_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'file_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {\n",
-       "        'grade': 'The degree of abnormality of cancer cells, a measure of differentiation, the extent to which \n",
-       "cancer cells are similar in appearance and function to healthy cells of the same tissue type. The degree of \n",
-       "differentiation often relates to the clinical behavior of the particular tumor. Based on the microscopic findings, \n",
-       "tumor grade is commonly described by one of four degrees of severity. Histopathologic grade of a tumor may be used \n",
-       "to plan treatment and estimate the future course, outcome, and overall prognosis of disease. Certain types of \n",
-       "cancers, such as soft tissue sarcoma, primary brain tumors, lymphomas, and breast have special grading systems.'\n",
-       "    },\n",
-       "    {\n",
-       "        'imaging_modality': 'An imaging modality describes the imaging equipment and/or method used to acquire \n",
-       "certain structural or functional information about the body. These include but are not limited to computed \n",
-       "tomography (CT) and magnetic resonance imaging (MRI). Taken from the DICOM standard.'\n",
-       "    },\n",
-       "    {\n",
-       "        'imaging_series': \"The 'logical' identifier of the series or grouping of imaging files in the system of \n",
-       "record which the file is a part of.\"\n",
-       "    },\n",
-       "    {'label': 'Short name or abbreviation for dataset. Maps to rdfs:label.'},\n",
-       "    {'member_of_research_project': 'A reference to the Study(s) of which this ResearchSubject is a member.'},\n",
-       "    {'method_of_diagnosis': 'The method used to confirm the patients malignant diagnosis'},\n",
-       "    {\n",
-       "        'morphology': 'Code that represents the histology of the disease using the third edition of the \n",
-       "International Classification of Diseases for Oncology, published in 2000, used principally in tumor and cancer \n",
-       "registri'\n",
-       "    },\n",
-       "    {'n_depth': 'Read depth across this locus in normal BAM'},\n",
-       "    {'normal_bam_uuid': 'Unique GDC identifier for the underlying normal bam file'},\n",
-       "    {'number_of_cycles': 'The number of treatment cycles the subject received.'},\n",
-       "    {'primary_diagnosis': 'The diagnosis instance that qualified a subject for inclusion on a ResearchProject'},\n",
-       "    {\n",
-       "        'primary_diagnosis_condition': \"The text term used to describe the type of malignant disease, as \n",
-       "categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O).\n",
-       "This attribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'primary_diagnosis_site': \"The text term used to describe the primary site of disease, as categorized by \n",
-       "the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This \n",
-       "categorization groups cases into general categories.  This attribute represents the primary site of disease that \n",
-       "qualified the subject for inclusion on the ResearchProject.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'primary_disease_type': \"The text term used to describe the type of malignant disease, as categorized by \n",
-       "the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O).   This \n",
-       "attribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'project_short_name': 'Project name abbreviation; the program name appended with a project name \n",
-       "abbreviation; eg. TCGA-OV, etc.'\n",
-       "    },\n",
-       "    {\n",
-       "        'race': 'An arbitrary classification of a taxonomic group that is a division of a species. It usually \n",
-       "arises as a consequence of geographical isolation within a species and is characterized by shared heredity, \n",
-       "physical attributes and behavior, and in the case of humans, by common history, nationality, or geographic \n",
-       "distribution. The provided values are based on the categories defined by the U.S. Office of Management and Business\n",
-       "and used by the U.S. Census Bureau.'\n",
-       "    },\n",
-       "    {'researchsubject_Files': 'List of ids of File entities associated with the ResearchSubject'},\n",
-       "    {\n",
-       "        'researchsubject_id': \"The 'logical' identifier of the entity in the system of record, e.g. a UUID.  This \n",
-       "'id' is unique within a given system. The identified entity may have a different 'id' in a different system. For \n",
-       "CDA, this is case_id.\"\n",
-       "    },\n",
-       "    {'researchsubject_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'researchsubject_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {\n",
-       "        'sample_barcode_normal': 'TCGA sample barcode for the normal control, eg TCGA-12-1089-01A. One sample may \n",
-       "have multiple sets of CN segmentations corresponding to multiple aliquots; use GROUP BY appropriately in queries'\n",
-       "    },\n",
-       "    {\n",
-       "        'sample_barcode_tumor': 'TCGA sample barcode for the tumor, eg TCGA-12-1089-01A. One sample may have \n",
-       "multiple sets of CN segmentations corresponding to multiple aliquots; use GROUP BY appropriately in queries'\n",
-       "    },\n",
-       "    {\n",
-       "        'sex': \"The biologic character or quality that distinguishes male and female from one another as expressed \n",
-       "by analysis of the person's gonadal, morphologic (internal and external), chromosomal, and hormonal \n",
-       "characteristics.\"\n",
-       "    },\n",
-       "    {\n",
-       "        'source_material_type': 'The general kind of material from which the specimen was derived, indicating the \n",
-       "physical nature of the source material. '\n",
-       "    },\n",
-       "    {\n",
-       "        'species': 'The taxonomic group (e.g. species) of the patient. For MVP, since taxonomy vocabulary is \n",
-       "consistent between GDC and PDC, using text.  Ultimately, this will be a term returned by the vocabulary service.'\n",
-       "    },\n",
-       "    {'specimen_Files': 'List of ids of File entities associated with the Specimen'},\n",
-       "    {'specimen_associated_project': 'The Project associated with the specimen.'},\n",
-       "    {\n",
-       "        'specimen_id': \"The 'logical' identifier of the entity in the system of record, e.g. a UUID.  This 'id' is \n",
-       "unique within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'specimen_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'specimen_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {\n",
-       "        'specimen_type': 'The high-level type of the specimen, based on its how it has been derived from the \n",
-       "original extracted sample. \\n'\n",
-       "    },\n",
-       "    {'src_vcf_id': '|-delimited list of GDC VCF file identifiers'},\n",
-       "    {\n",
-       "        'stage': 'The extent of a cancer in the body. Staging is usually based on the size of the tumor, whether \n",
-       "lymph nodes contain cancer, and whether the cancer has spread from the original site to other parts of the body.'\n",
-       "    },\n",
-       "    {'subject_Files': 'List of ids of File entities associated with the Patient'},\n",
-       "    {'subject_associated_project': 'The list of Projects associated with the Subject.'},\n",
-       "    {\n",
-       "        'subject_id': \"The 'logical' identifier of the entity in the system of record, e.g. a UUID.  This 'id' is \n",
-       "unique within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'subject_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'subject_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {'t_alt_count': 'Read depth supporting the variant allele in tumor BAM'},\n",
-       "    {'t_depth': 'Read depth across this locus in tumor BAM'},\n",
-       "    {'t_ref_count': 'Read depth supporting the reference allele in tumor BAM'},\n",
-       "    {'therapeutic_agent': 'One or more therapeutic agents as part of this treatment.'},\n",
-       "    {'treatment_anatomic_site': 'The anatomical site that the treatment targets.'},\n",
-       "    {'treatment_effect': 'The effect of a treatment on the diagnosis or tumor.'},\n",
-       "    {'treatment_end_reason': 'The reason the treatment ended.'},\n",
-       "    {\n",
-       "        'treatment_id': \"The 'logical' identifier of the entity in the repository, e.g. a UUID.  This 'id' is \n",
-       "unique within a given system. The identified entity may have a different 'id' in a different system.\"\n",
-       "    },\n",
-       "    {'treatment_identifier_system': 'The system or namespace that defines the identifier.'},\n",
-       "    {'treatment_identifier_value': 'The value of the identifier, as defined by the system.'},\n",
-       "    {'treatment_outcome': 'The final outcome of the treatment.'},\n",
-       "    {'treatment_type': 'The treatment type including medication/therapeutics or other procedures.'},\n",
-       "    {'tumor_bam_uuid': 'Unique GDC identifier for the underlying bam file'},\n",
-       "    {\n",
-       "        'vital_status': 'Coded value indicating the state or condition of being living or deceased; also includes \n",
-       "the case where the vital status is unknown.'\n",
-       "    }\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - " \u001b[1m{\u001b[0m\u001b[32m'AA_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in NHLBI-ESP African American population'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'AFR_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined African \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ALLELE_NUM'\u001b[0m: \u001b[32m'Allele number from input; 0 is reference, 1 is first alternate etc.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'AMR_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined American \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Allele'\u001b[0m: \u001b[32m'The variant allele used to calculate the consequence'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Amino_acids'\u001b[0m: \u001b[32m'Amino acid substitution caused by the mutation. Only given if the variation affects the \u001b[0m\n", - "\u001b[32mprotein-coding sequence'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'BIOTYPE'\u001b[0m: \u001b[32m'Biotype of transcript'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'CANONICAL'\u001b[0m: \u001b[32m'A flag \u001b[0m\u001b[32m(\u001b[0m\u001b[32mYES\u001b[0m\u001b[32m)\u001b[0m\u001b[32m indicating that the VEP-based canonical transcript, the longest translation, was\u001b[0m\n", - "\u001b[32mused for this gene. If not, the value is null'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'CCDS'\u001b[0m: \u001b[32m'The CCDS identifier for this transcript, where applicable'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'CDS_position'\u001b[0m: \u001b[32m'Relative position of base pair in coding sequence. A - symbol is displayed as the \u001b[0m\n", - "\u001b[32mnumerator if the variant does not appear in coding sequence'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'CLIN_SIG'\u001b[0m: \u001b[32m'Clinical significance of variant from dbSNP'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'CONTEXT'\u001b[0m: \u001b[32m'The reference allele per VCF specs, and its five flanking base pairs'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'COSMIC'\u001b[0m: \u001b[32m'Overlapping COSMIC variants'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Center'\u001b[0m: \u001b[32m'One or more genome sequencing center reporting the variant'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Chromosome'\u001b[0m: \u001b[32m'Chromosome, possible values: chr1-22, and chrX'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Codons'\u001b[0m: \u001b[32m'The alternative codons with the variant base in upper case'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Consequence'\u001b[0m: \u001b[32m'Consequence type of this variant; sequence ontology terms'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'DISTANCE'\u001b[0m: \u001b[32m'Shortest distance from the variant to transcript'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'DOMAINS'\u001b[0m: \u001b[32m'The source and identifier of any overlapping protein domains'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'EAS_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined East Asian \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'EA_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in NHLBI-ESP European American \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ENSP'\u001b[0m: \u001b[32m'The Ensembl protein identifier of the affected transcript'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'EUR_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined European \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'EXON'\u001b[0m: \u001b[32m'The exon number \u001b[0m\u001b[32m(\u001b[0m\u001b[32mout of total number\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'End_Position'\u001b[0m: \u001b[32m'Highest numeric genomic position of the reported variant on the genomic reference \u001b[0m\n", - "\u001b[32msequence. Mutation end coordinate'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Entrez_Gene_Id'\u001b[0m: \u001b[32m'Entrez gene ID \u001b[0m\u001b[32m(\u001b[0m\u001b[32man integer\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. 0 is used for regions that do not correspond to a gene \u001b[0m\n", - "\u001b[32mregion or Ensembl ID'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF'\u001b[0m: \u001b[32m'Global Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_AFR'\u001b[0m: \u001b[32m'African/African American Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_AMR'\u001b[0m: \u001b[32m'American Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_Adj'\u001b[0m: \u001b[32m'Adjusted Global Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_EAS'\u001b[0m: \u001b[32m'East Asian Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_FIN'\u001b[0m: \u001b[32m'Finnish Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_NFE'\u001b[0m: \u001b[32m'Non-Finnish European Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_OTH'\u001b[0m: \u001b[32m'Other Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'ExAC_AF_SAS'\u001b[0m: \u001b[32m'South Asian Allele Frequency from ExAC'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Existing_variation'\u001b[0m: \u001b[32m'Known identifier of existing variation'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Exon_Number'\u001b[0m: \u001b[32m'The exon number \u001b[0m\u001b[32m(\u001b[0m\u001b[32mout of total number\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'FILTER'\u001b[0m: \u001b[32m'Copied from input VCF. This includes filters implemented directly by the variant caller and \u001b[0m\n", - "\u001b[32mother external software used in the DNA-Seq pipeline. See below for additional details.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Feature'\u001b[0m: \u001b[32m'Stable Ensembl ID of feature \u001b[0m\u001b[32m(\u001b[0m\u001b[32mtranscript, regulatory, motif\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Feature_type'\u001b[0m: \u001b[32m'Type of feature. Currently one of Transcript, RegulatoryFeature, MotifFeature \u001b[0m\u001b[32m(\u001b[0m\u001b[32mor blank\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'GDC_FILTER'\u001b[0m: \u001b[32m'GDC filters applied universally across all MAFs'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'GDC_Validation_Status'\u001b[0m: \u001b[32m'GDC implementation of validation checks. See notes section \u001b[0m\u001b[32m(\u001b[0m\u001b[32m#5\u001b[0m\u001b[32m)\u001b[0m\u001b[32m below for \u001b[0m\n", - "\u001b[32mdetails'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'GMAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Gene'\u001b[0m: \u001b[32m'The gene symbol. In this table, gene symbol is gene name e.g. ACADVL'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'HGNC_ID'\u001b[0m: \u001b[32m'Gene identifier from the HUGO Gene Nomenclature Committee if applicable'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'HGVS_OFFSET'\u001b[0m: \u001b[32m'Indicates by how many bases the HGVS notations for this variant have been shifted'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'HGVSc'\u001b[0m: \u001b[32m'The coding sequence of the variant in HGVS recommended format'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'HGVSp'\u001b[0m: \u001b[32m'The protein sequence of the variant in HGVS recommended format. p.= signifies no change in the \u001b[0m\n", - "\u001b[32mprotein'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'HGVSp_Short'\u001b[0m: \u001b[32m'Same as the HGVSp column, but using 1-letter amino-acid codes'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Hugo_Symbol'\u001b[0m: \u001b[32m'HUGO symbol for the gene \u001b[0m\u001b[32m(\u001b[0m\u001b[32mHUGO symbols are always in all caps\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Unknown is used for regions\u001b[0m\n", - "\u001b[32mthat do not correspond to a gene'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'IMPACT'\u001b[0m: \u001b[32m'The impact modifier for the consequence type'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'INTRON'\u001b[0m: \u001b[32m'The intron number \u001b[0m\u001b[32m(\u001b[0m\u001b[32mout of total number\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'MC3_Overlap'\u001b[0m: \u001b[32m'Indicates whether this region overlaps with an MC3 variant for the same sample pair'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'MINIMISED'\u001b[0m: \u001b[32m'Alleles in this variant have been converted to minimal representation before consequence \u001b[0m\n", - "\u001b[32mcalculation \u001b[0m\u001b[32m(\u001b[0m\u001b[32m1 or null\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Matched_Norm_Sample_UUID'\u001b[0m: \u001b[32m'Unique GDC identifier for normal aliquot \u001b[0m\u001b[32m(\u001b[0m\u001b[32m10189 unique\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Mutation_Status'\u001b[0m: \u001b[32m'An assessment of the mutation as somatic, germline, LOH, post transcriptional \u001b[0m\n", - "\u001b[32mmodification, unknown, or none. The values allowed in this field are constrained by the value in the \u001b[0m\n", - "\u001b[32mValidation_Status field'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'NCBI_Build'\u001b[0m: \u001b[32m'The reference genome used for the alignment \u001b[0m\u001b[32m(\u001b[0m\u001b[32mGRCh38\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'One_Consequence'\u001b[0m: \u001b[32m'The single consequence of the canonical transcript in sequence ontology terms, eg \u001b[0m\n", - "\u001b[32mmissense_variant'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'PHENO'\u001b[0m: \u001b[32m'Indicates if existing variant is associated with a phenotype, disease or trait \u001b[0m\u001b[32m(\u001b[0m\u001b[32m0, 1, or null\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'PICK'\u001b[0m: \u001b[32m\"Indicates if this block of consequence data was picked by VEP's pick feature \u001b[0m\u001b[32m(\u001b[0m\u001b[32m1 or null\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\"\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'PUBMED'\u001b[0m: \u001b[32m'Pubmed ID\u001b[0m\u001b[32m(\u001b[0m\u001b[32ms\u001b[0m\u001b[32m)\u001b[0m\u001b[32m of publications that cite existing variant'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'PolyPhen'\u001b[0m: \u001b[32m'The PolyPhen prediction and/or score'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Protein_position'\u001b[0m: \u001b[32m'Relative position of affected amino acid in protein. A - symbol is displayed as the \u001b[0m\n", - "\u001b[32mnumerator if the variant does not appear in coding sequence'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'RefSeq'\u001b[0m: \u001b[32m'RefSeq identifier for this transcript'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Reference_Allele'\u001b[0m: \u001b[32m'The plus strand reference allele at this position. Includes the deleted sequence for a\u001b[0m\n", - "\u001b[32mdeletion or - for an insertion'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'SAS_MAF'\u001b[0m: \u001b[32m'Non-reference allele and frequency of existing variant in 1000 Genomes combined South Asian \u001b[0m\n", - "\u001b[32mpopulation'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'SIFT'\u001b[0m: \u001b[32m'The SIFT prediction and/or score, with both given as prediction \u001b[0m\u001b[32m(\u001b[0m\u001b[32mscore\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'SOMATIC'\u001b[0m: \u001b[32m'Somatic status of each ID reported under Existing_variation \u001b[0m\u001b[32m(\u001b[0m\u001b[32m0, 1, or null\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'SWISSPROT'\u001b[0m: \u001b[32m'UniProtKB/Swiss-Prot accession'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'SYMBOL'\u001b[0m: \u001b[32m'Eg TP53, LRP1B, etc \u001b[0m\u001b[32m(\u001b[0m\u001b[32msame as Hugo_Symbol field except blank instead of Unknown'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'SYMBOL_SOURCE'\u001b[0m: \u001b[32m'The source of the gene symbol, usually HGNC, rarely blank, other sources include \u001b[0m\n", - "\u001b[32mUniprot_gn, EntrezGene, etc'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Sequencer'\u001b[0m: \u001b[32m'Instrument used to produce primary sequence data'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Start_Position'\u001b[0m: \u001b[32m'Lowest numeric position of the reported variant on the genomic reference sequence. \u001b[0m\n", - "\u001b[32mMutation start coordinate'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Strand'\u001b[0m: \u001b[32m'Either + or - to denote whether read mapped to the sense \u001b[0m\u001b[32m(\u001b[0m\u001b[32m+\u001b[0m\u001b[32m)\u001b[0m\u001b[32m or anti-sense \u001b[0m\u001b[32m(\u001b[0m\u001b[32m-\u001b[0m\u001b[32m)\u001b[0m\u001b[32m strand'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'TRANSCRIPT_STRAND'\u001b[0m: \u001b[32m'The DNA strand \u001b[0m\u001b[32m(\u001b[0m\u001b[32m1 or -1\u001b[0m\u001b[32m)\u001b[0m\u001b[32m on which the transcript/feature lies'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'TREMBL'\u001b[0m: \u001b[32m'UniProtKB/TrEMBL identifier of protein product'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'TSL'\u001b[0m: \u001b[32m'Transcript support level, which is based on independent RNA analyses'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Transcript_ID'\u001b[0m: \u001b[32m'Ensembl ID of the transcript affected by the variant'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Tumor_Sample_UUID'\u001b[0m: \u001b[32m'Unique GDC identifier for tumor aliquot \u001b[0m\u001b[32m(\u001b[0m\u001b[32m10189 unique\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Tumor_Seq_Allele1'\u001b[0m: \u001b[32m'Primary data genotype for tumor sequencing \u001b[0m\u001b[32m(\u001b[0m\u001b[32mdiscovery\u001b[0m\u001b[32m)\u001b[0m\u001b[32m allele 1. A - symbol for a \u001b[0m\n", - "\u001b[32mdeletion represents a variant. A - symbol for an insertion represents wild-type allele. Novel inserted sequence for\u001b[0m\n", - "\u001b[32minsertion does not include flanking reference bases'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Tumor_Seq_Allele2'\u001b[0m: \u001b[32m'Primary data genotype for tumor sequencing \u001b[0m\u001b[32m(\u001b[0m\u001b[32mdiscovery\u001b[0m\u001b[32m)\u001b[0m\u001b[32m allele 2. A - symbol for a \u001b[0m\n", - "\u001b[32mdeletion represents a variant. A - symbol for an insertion represents wild-type allele. Novel inserted sequence for\u001b[0m\n", - "\u001b[32minsertion does not include flanking reference bases'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Tumor_Validation_Allele1'\u001b[0m: \u001b[32m'Secondary data from orthogonal technology. Tumor genotyping \u001b[0m\u001b[32m(\u001b[0m\u001b[32mvalidation\u001b[0m\u001b[32m)\u001b[0m\u001b[32m for \u001b[0m\n", - "\u001b[32mallele 1. A - symbol for a deletion represents a variant. A - symbol for an insertion represents wild-type allele. \u001b[0m\n", - "\u001b[32mNovel inserted sequence for insertion does not include flanking reference bases'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Tumor_Validation_Allele2'\u001b[0m: \u001b[32m'Secondary data from orthogonal technology. Tumor genotyping \u001b[0m\u001b[32m(\u001b[0m\u001b[32mvalidation\u001b[0m\u001b[32m)\u001b[0m\u001b[32m for \u001b[0m\n", - "\u001b[32mallele 2'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'UNIPARC'\u001b[0m: \u001b[32m'UniParc identifier of protein product'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'VARIANT_CLASS'\u001b[0m: \u001b[32m'Sequence Ontology variant class'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Validation_Method'\u001b[0m: \u001b[32m'The assay platforms used for the validation call'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'Variant_Classification'\u001b[0m: \u001b[32m'Translational effect of variant allele'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'Variant_Type'\u001b[0m: \u001b[32m'Type of mutation. TNP \u001b[0m\u001b[32m(\u001b[0m\u001b[32mtri-nucleotide polymorphism\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is analogous to DNP \u001b[0m\u001b[32m(\u001b[0m\u001b[32mdi-nucleotide \u001b[0m\n", - "\u001b[32mpolymorphism\u001b[0m\u001b[32m)\u001b[0m\u001b[32m but for three consecutive nucleotides. ONP \u001b[0m\u001b[32m(\u001b[0m\u001b[32moligo-nucleotide polymorphism\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is analogous to TNP but \u001b[0m\n", - "\u001b[32mfor consecutive runs of four or more \u001b[0m\u001b[32m(\u001b[0m\u001b[32mSNP, DNP, TNP, ONP, INS, DEL, or Consolidated\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'age_at_diagnosis'\u001b[0m: \u001b[32m'The age in days of the individual at the time of diagnosis'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'aliquot_barcode_normal'\u001b[0m: \u001b[32m'TCGA aliquot barcode for the normal control, eg TCGA-12-1089-01A-01D-0517-01'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'aliquot_barcode_tumor'\u001b[0m: \u001b[32m'TCGA aliquot barcode for the tumor, eg TCGA-12-1089-01A-01D-0517-01'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'all_effects'\u001b[0m: \u001b[32m'A semicolon delimited list of all possible variant effects, sorted by priority \u001b[0m\n", - "\u001b[32m(\u001b[0m\u001b[32m[\u001b[0m\u001b[32mSymbol,Consequence,HGVSp_Short,Transcript_ID,RefSeq,HGVSc,Impact,Canonical,Sift,PolyPhen,Strand\u001b[0m\u001b[32m]\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'anatomical_site'\u001b[0m: \u001b[32m'Per GDC Dictionary, the text term that represents the name of the primary disease site \u001b[0m\n", - "\u001b[32mof the submitted tumor sample; recommend dropping tumor; biospecimen_anatomic_site.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'byte_size'\u001b[0m: \u001b[32m'Size of the file in bytes. Maps to dcat:byteSize.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'cDNA_position'\u001b[0m: \u001b[32m'Relative position of base pair in the cDNA sequence as a fraction. A - symbol is \u001b[0m\n", - "\u001b[32mdisplayed as the numerator if the variant does not appear in cDNA'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'callerName'\u001b[0m: \u001b[32m'|-delimited list of mutation caller\u001b[0m\u001b[32m(\u001b[0m\u001b[32ms\u001b[0m\u001b[32m)\u001b[0m\u001b[32m that agreed on this particular call, always in \u001b[0m\n", - "\u001b[32malphabetical order: muse, mutect, somaticsniper, varscan'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'case_barcode'\u001b[0m: \u001b[32m'Original TCGA case barcode, eg TCGA-DX-A8BN'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'case_id'\u001b[0m: \u001b[32m'Unique GDC identifier for the underlying case'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'cause_of_death'\u001b[0m: \u001b[32m'Coded value indicating the circumstance or condition that results in the death of the \u001b[0m\n", - "\u001b[32msubject.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'checksum'\u001b[0m: \u001b[32m'A digit representing the sum of the correct digits in a piece of stored or transmitted digital\u001b[0m\n", - "\u001b[32mdata, against which later comparisons can be made to detect errors in the data.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'data_category'\u001b[0m: \u001b[32m'Broad categorization of the contents of the data file.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'data_modality'\u001b[0m: \u001b[32m'Data modality describes the biological nature of the information gathered as the result \u001b[0m\n", - "\u001b[32mof an Activity, independent of the technology or methods used to produce the information.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'data_type'\u001b[0m: \u001b[32m'Specific content type of the data file.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'days_to_birth'\u001b[0m: \u001b[32m\"Number of days between the date used for index and the date from a person's date of birth\u001b[0m\n", - "\u001b[32mrepresented as a calculated negative number of days.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'days_to_collection'\u001b[0m: \u001b[32m'The number of days from the index date to either the date a sample was collected for\u001b[0m\n", - "\u001b[32ma specific study or project, or the date a patient underwent a procedure \u001b[0m\u001b[32m(\u001b[0m\u001b[32me.g. surgical resection\u001b[0m\u001b[32m)\u001b[0m\u001b[32m yielding a \u001b[0m\n", - "\u001b[32msample that was eventually used for research.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'days_to_death'\u001b[0m: \u001b[32m\"Number of days between the date used for index and the date from a person's date of death\u001b[0m\n", - "\u001b[32mrepresented as a calculated number of days.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'days_to_treatment_end'\u001b[0m: \u001b[32m' The timepoint at which the treatment ended.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'days_to_treatment_start'\u001b[0m: \u001b[32m'The timepoint at which the treatment started.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dbSNP_RS'\u001b[0m: \u001b[32m'The rs-IDs from the dbSNP database, novel if not found in any database used, or null if \u001b[0m\n", - "\u001b[32mthere is no dbSNP record, but it is found in other databases'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dbSNP_Val_Status'\u001b[0m: \u001b[32m'The dbSNP validation status is reported as a semicolon-separated list of statuses. The\u001b[0m\n", - "\u001b[32munion of all rs-IDs is taken when there are multiple'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'dbgap_accession_number'\u001b[0m: \u001b[32m'The dbgap accession number for the project.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'derived_from_specimen'\u001b[0m: \u001b[32m'A source/parent specimen from which this one was directly derived.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'derived_from_subject'\u001b[0m: \u001b[32m'The Patient/ResearchSubject, or Biologically Derived Materal \u001b[0m\u001b[32m(\u001b[0m\u001b[32me.g. a cell line, \u001b[0m\n", - "\u001b[32mtissue culture, organoid\u001b[0m\u001b[32m)\u001b[0m\u001b[32m from which the specimen was directly or indirectly derived.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'diagnosis_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is \u001b[0m\n", - "\u001b[32munique within a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'diagnosis_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'diagnosis_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'drs_uri'\u001b[0m: \u001b[32m'A string of characters used to identify a resource on the Data Repo Service\u001b[0m\u001b[32m(\u001b[0m\u001b[32mDRS\u001b[0m\u001b[32m)\u001b[0m\u001b[32m.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'ethnicity'\u001b[0m: \u001b[32m\"An individual's self-described social and cultural grouping, specifically whether an \u001b[0m\n", - "\u001b[32mindividual describes themselves as Hispanic or Latino. The provided values are based on the categories defined by \u001b[0m\n", - "\u001b[32mthe U.S. Office of Management and Business and used by the U.S. Census Bureau.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'fileName'\u001b[0m: \u001b[32m'|-delimited list of name of underlying MAF file'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'fileUUID'\u001b[0m: \u001b[32m'|-delimited list of unique GDC identifiers for underlying MAF file'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'file_associated_project'\u001b[0m: \u001b[32m'A reference to the Project\u001b[0m\u001b[32m(\u001b[0m\u001b[32ms\u001b[0m\u001b[32m)\u001b[0m\u001b[32m of which this ResearchSubject is a member. The \u001b[0m\n", - "\u001b[32massociated_project may be embedded using the $ref definition or may be a reference to the id for the Project - or a\u001b[0m\n", - "\u001b[32mURI expressed as a string to an existing entity.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'file_format'\u001b[0m: \u001b[32m'Format of the data files.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'file_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique \u001b[0m\n", - "\u001b[32mwithin a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'file_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'file_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'grade'\u001b[0m: \u001b[32m'The degree of abnormality of cancer cells, a measure of differentiation, the extent to which \u001b[0m\n", - "\u001b[32mcancer cells are similar in appearance and function to healthy cells of the same tissue type. The degree of \u001b[0m\n", - "\u001b[32mdifferentiation often relates to the clinical behavior of the particular tumor. Based on the microscopic findings, \u001b[0m\n", - "\u001b[32mtumor grade is commonly described by one of four degrees of severity. Histopathologic grade of a tumor may be used \u001b[0m\n", - "\u001b[32mto plan treatment and estimate the future course, outcome, and overall prognosis of disease. Certain types of \u001b[0m\n", - "\u001b[32mcancers, such as soft tissue sarcoma, primary brain tumors, lymphomas, and breast have special grading systems.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'imaging_modality'\u001b[0m: \u001b[32m'An imaging modality describes the imaging equipment and/or method used to acquire \u001b[0m\n", - "\u001b[32mcertain structural or functional information about the body. These include but are not limited to computed \u001b[0m\n", - "\u001b[32mtomography \u001b[0m\u001b[32m(\u001b[0m\u001b[32mCT\u001b[0m\u001b[32m)\u001b[0m\u001b[32m and magnetic resonance imaging \u001b[0m\u001b[32m(\u001b[0m\u001b[32mMRI\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Taken from the DICOM standard.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'imaging_series'\u001b[0m: \u001b[32m\"The 'logical' identifier of the series or grouping of imaging files in the system of \u001b[0m\n", - "\u001b[32mrecord which the file is a part of.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'label'\u001b[0m: \u001b[32m'Short name or abbreviation for dataset. Maps to rdfs:label.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'member_of_research_project'\u001b[0m: \u001b[32m'A reference to the Study\u001b[0m\u001b[32m(\u001b[0m\u001b[32ms\u001b[0m\u001b[32m)\u001b[0m\u001b[32m of which this ResearchSubject is a member.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'method_of_diagnosis'\u001b[0m: \u001b[32m'The method used to confirm the patients malignant diagnosis'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'morphology'\u001b[0m: \u001b[32m'Code that represents the histology of the disease using the third edition of the \u001b[0m\n", - "\u001b[32mInternational Classification of Diseases for Oncology, published in 2000, used principally in tumor and cancer \u001b[0m\n", - "\u001b[32mregistri'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'n_depth'\u001b[0m: \u001b[32m'Read depth across this locus in normal BAM'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'normal_bam_uuid'\u001b[0m: \u001b[32m'Unique GDC identifier for the underlying normal bam file'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'number_of_cycles'\u001b[0m: \u001b[32m'The number of treatment cycles the subject received.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'primary_diagnosis'\u001b[0m: \u001b[32m'The diagnosis instance that qualified a subject for inclusion on a ResearchProject'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'primary_diagnosis_condition'\u001b[0m: \u001b[32m\"The text term used to describe the type of malignant disease, as \u001b[0m\n", - "\u001b[32mcategorized by the World Health Organization's \u001b[0m\u001b[32m(\u001b[0m\u001b[32mWHO\u001b[0m\u001b[32m)\u001b[0m\u001b[32m International Classification of Diseases for Oncology \u001b[0m\u001b[32m(\u001b[0m\u001b[32mICD-O\u001b[0m\u001b[32m)\u001b[0m\u001b[32m.\u001b[0m\n", - "\u001b[32mThis attribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'primary_diagnosis_site'\u001b[0m: \u001b[32m\"The text term used to describe the primary site of disease, as categorized by \u001b[0m\n", - "\u001b[32mthe World Health Organization's \u001b[0m\u001b[32m(\u001b[0m\u001b[32mWHO\u001b[0m\u001b[32m)\u001b[0m\u001b[32m International Classification of Diseases for Oncology \u001b[0m\u001b[32m(\u001b[0m\u001b[32mICD-O\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. This \u001b[0m\n", - "\u001b[32mcategorization groups cases into general categories. This attribute represents the primary site of disease that \u001b[0m\n", - "\u001b[32mqualified the subject for inclusion on the ResearchProject.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'primary_disease_type'\u001b[0m: \u001b[32m\"The text term used to describe the type of malignant disease, as categorized by \u001b[0m\n", - "\u001b[32mthe World Health Organization's \u001b[0m\u001b[32m(\u001b[0m\u001b[32mWHO\u001b[0m\u001b[32m)\u001b[0m\u001b[32m International Classification of Diseases for Oncology \u001b[0m\u001b[32m(\u001b[0m\u001b[32mICD-O\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. This \u001b[0m\n", - "\u001b[32mattribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'project_short_name'\u001b[0m: \u001b[32m'Project name abbreviation; the program name appended with a project name \u001b[0m\n", - "\u001b[32mabbreviation; eg. TCGA-OV, etc.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'race'\u001b[0m: \u001b[32m'An arbitrary classification of a taxonomic group that is a division of a species. It usually \u001b[0m\n", - "\u001b[32marises as a consequence of geographical isolation within a species and is characterized by shared heredity, \u001b[0m\n", - "\u001b[32mphysical attributes and behavior, and in the case of humans, by common history, nationality, or geographic \u001b[0m\n", - "\u001b[32mdistribution. The provided values are based on the categories defined by the U.S. Office of Management and Business\u001b[0m\n", - "\u001b[32mand used by the U.S. Census Bureau.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'researchsubject_Files'\u001b[0m: \u001b[32m'List of ids of File entities associated with the ResearchSubject'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'researchsubject_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the system of record, e.g. a UUID. This \u001b[0m\n", - "\u001b[32m'id' is unique within a given system. The identified entity may have a different 'id' in a different system. For \u001b[0m\n", - "\u001b[32mCDA, this is case_id.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'researchsubject_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'researchsubject_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'sample_barcode_normal'\u001b[0m: \u001b[32m'TCGA sample barcode for the normal control, eg TCGA-12-1089-01A. One sample may \u001b[0m\n", - "\u001b[32mhave multiple sets of CN segmentations corresponding to multiple aliquots; use GROUP BY appropriately in queries'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'sample_barcode_tumor'\u001b[0m: \u001b[32m'TCGA sample barcode for the tumor, eg TCGA-12-1089-01A. One sample may have \u001b[0m\n", - "\u001b[32mmultiple sets of CN segmentations corresponding to multiple aliquots; use GROUP BY appropriately in queries'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'sex'\u001b[0m: \u001b[32m\"The biologic character or quality that distinguishes male and female from one another as expressed \u001b[0m\n", - "\u001b[32mby analysis of the person's gonadal, morphologic \u001b[0m\u001b[32m(\u001b[0m\u001b[32minternal and external\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, chromosomal, and hormonal \u001b[0m\n", - "\u001b[32mcharacteristics.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'source_material_type'\u001b[0m: \u001b[32m'The general kind of material from which the specimen was derived, indicating the \u001b[0m\n", - "\u001b[32mphysical nature of the source material. '\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'species'\u001b[0m: \u001b[32m'The taxonomic group \u001b[0m\u001b[32m(\u001b[0m\u001b[32me.g. species\u001b[0m\u001b[32m)\u001b[0m\u001b[32m of the patient. For MVP, since taxonomy vocabulary is \u001b[0m\n", - "\u001b[32mconsistent between GDC and PDC, using text. Ultimately, this will be a term returned by the vocabulary service.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'specimen_Files'\u001b[0m: \u001b[32m'List of ids of File entities associated with the Specimen'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'specimen_associated_project'\u001b[0m: \u001b[32m'The Project associated with the specimen.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'specimen_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is \u001b[0m\n", - "\u001b[32munique within a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'specimen_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'specimen_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'specimen_type'\u001b[0m: \u001b[32m'The high-level type of the specimen, based on its how it has been derived from the \u001b[0m\n", - "\u001b[32moriginal extracted sample. \\n'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'src_vcf_id'\u001b[0m: \u001b[32m'|-delimited list of GDC VCF file identifiers'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'stage'\u001b[0m: \u001b[32m'The extent of a cancer in the body. Staging is usually based on the size of the tumor, whether \u001b[0m\n", - "\u001b[32mlymph nodes contain cancer, and whether the cancer has spread from the original site to other parts of the body.'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'subject_Files'\u001b[0m: \u001b[32m'List of ids of File entities associated with the Patient'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'subject_associated_project'\u001b[0m: \u001b[32m'The list of Projects associated with the Subject.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'subject_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is \u001b[0m\n", - "\u001b[32munique within a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'subject_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'subject_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m't_alt_count'\u001b[0m: \u001b[32m'Read depth supporting the variant allele in tumor BAM'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m't_depth'\u001b[0m: \u001b[32m'Read depth across this locus in tumor BAM'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m't_ref_count'\u001b[0m: \u001b[32m'Read depth supporting the reference allele in tumor BAM'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'therapeutic_agent'\u001b[0m: \u001b[32m'One or more therapeutic agents as part of this treatment.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_anatomic_site'\u001b[0m: \u001b[32m'The anatomical site that the treatment targets.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_effect'\u001b[0m: \u001b[32m'The effect of a treatment on the diagnosis or tumor.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_end_reason'\u001b[0m: \u001b[32m'The reason the treatment ended.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'treatment_id'\u001b[0m: \u001b[32m\"The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is \u001b[0m\n", - "\u001b[32munique within a given system. The identified entity may have a different 'id' in a different system.\"\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_identifier_system'\u001b[0m: \u001b[32m'The system or namespace that defines the identifier.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_identifier_value'\u001b[0m: \u001b[32m'The value of the identifier, as defined by the system.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_outcome'\u001b[0m: \u001b[32m'The final outcome of the treatment.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'treatment_type'\u001b[0m: \u001b[32m'The treatment type including medication/therapeutics or other procedures.'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'tumor_bam_uuid'\u001b[0m: \u001b[32m'Unique GDC identifier for the underlying bam file'\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'vital_status'\u001b[0m: \u001b[32m'Coded value indicating the state or condition of being living or deceased; also includes \u001b[0m\n", - "\u001b[32mthe case where the vital status is unknown.'\u001b[0m\n", - " \u001b[1m}\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(columns().to_list())" - ] - }, - { - "cell_type": "markdown", - "id": "bd05eba2", - "metadata": {}, - "source": [ - "\n", - " \n", - " \n", - "There are a lot of columns in the CDA data, but Julia is most interested in diagnosis data, so she filters the list to only those:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "536970c4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'age_at_diagnosis': 'The age in days of the individual at the time of diagnosis'},\n", - " {'diagnosis_id': \"The 'logical' identifier of the entity in the repository, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.\"},\n", - " {'diagnosis_identifier_system': 'The system or namespace that defines the identifier.'},\n", - " {'diagnosis_identifier_value': 'The value of the identifier, as defined by the system.'},\n", - " {'method_of_diagnosis': 'The method used to confirm the patients malignant diagnosis'},\n", - " {'primary_diagnosis': 'The diagnosis instance that qualified a subject for inclusion on a ResearchProject'},\n", - " {'primary_diagnosis_condition': \"The text term used to describe the type of malignant disease, as categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This attribute represents the disease that qualified the subject for inclusion on the ResearchProject.\"},\n", - " {'primary_diagnosis_site': \"The text term used to describe the primary site of disease, as categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This categorization groups cases into general categories. This attribute represents the primary site of disease that qualified the subject for inclusion on the ResearchProject.\"}]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "columns().to_list(filters=\"diagnosis\")" - ] - }, - { - "cell_type": "markdown", - "id": "a63b4cf0", - "metadata": {}, - "source": [ - "
\n", - "\n", - "To search the CDA, a user also needs to know what search terms are available. Each column will contain a huge amount of data, so retreiving all of the rows would be overwhelming. Instead, the CDA has a `unique_terms()` function that will return all of the unique values that populate the requested column. Like `columns`, `unique_terms` defaults to giving us an overview of the results, and can be filtered.\n", - " \n", - "
\n", - "\n", - "\n", - " \n", - "Since Julia is interested specificially in uterine cancers, she uses the `unique_terms` function to see what data is available for 'ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site' and 'ResearchSubject.primary_diagnosis_site' to see if 'uterine' appears:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4527dde5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n",
-       "            Http Status: 400\n",
-       "            Error Message: Column ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site does not exist\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Http Status: \u001b[1;36m400\u001b[0m\n", - " Error Message: Column ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site does not exist\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "AttributeError", - "evalue": "'NoneType' object has no attribute 'to_list'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/tmp/ipykernel_74/2639024179.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0munique_terms\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'to_list'" - ] - } - ], - "source": [ - "unique_terms(\"ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site\").to_list()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "740e5955", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list()" - ] - }, - { - "cell_type": "markdown", - "id": "b005036b", - "metadata": {}, - "source": [ - "
\n", - " \n", - "CDA makes multiple datasets searchable from a common interface, but does not harmonize the data. This means that researchers should review all the terms in a column, and not just choose the first one that fits, as there may be other similar terms available as well.\n", - " \n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "73e6b8dc", - "metadata": {}, - "source": [ - "\n", - " \n", - "Julia sees that \"treatment_anatomic_site\" does not have 'Uterine', but does have 'Cervix'. She also notes that both 'Uterus' and 'Uterus, NOS' are listed in the \"primary_diagnosis_site\" results. As she was initially looking for \"uterine\", Julia decides to expand her search a bit to account for variable naming schemes. So, she runs a fuzzy match filter on the \"ResearchSubject.primary_diagnosis_site\" for 'uter' as that should cover all variants:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "31064125", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(filters=\"uter\")" - ] - }, - { - "cell_type": "markdown", - "id": "9311a49e", - "metadata": {}, - "source": [ - "\n", - " \n", - "Just to be sure, Julia also searches for any other instances of \"cervix\":" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2038a8cf", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(filters=\"cerv\")" - ] - }, - { - "cell_type": "markdown", - "id": "29c4de58", - "metadata": {}, - "source": [ - "## Building a Query\n", - "\n", - "\n", - " \n", - "With all her likely terms found, Julia begins to create a search that will get data for all of her terms. She does this by writing a series of `Q` statements that define what rows should be returned from each column. For the \"treatment_anatomic_site\", only one term is of interest, so she uses the `=` operator to get only exact matches:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "951fcc8f", - "metadata": {}, - "outputs": [], - "source": [ - "Tsite = Q('ResearchSubject.Diagnosis.Treatment.treatment_anatomic_site = \"Cervix\"')" - ] - }, - { - "cell_type": "markdown", - "id": "12cb5f72", - "metadata": {}, - "source": [ - "\n", - " \n", - "However, for \"primary_diagnosis_site\", Julia has several terms she wants to search with. Luckily, `Q` also can run fuzzy searches. It can also search more than one term at a time, so Julia writes one big `Q` statement to grab everything that is either 'uter' or 'cerv':" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36cfd8a4", - "metadata": {}, - "outputs": [], - "source": [ - "Dsite = Q('ResearchSubject.primary_diagnosis_site = \"%uter%\" OR ResearchSubject.primary_diagnosis_site = \"%cerv%\"')" - ] - }, - { - "cell_type": "markdown", - "id": "349af6f2", - "metadata": {}, - "source": [ - "\n", - " \n", - "Finally, Julia adds her two queries together into one large one:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9f5f9e4f", - "metadata": {}, - "outputs": [], - "source": [ - "ALLDATA = Tsite.OR(Dsite)" - ] - }, - { - "cell_type": "markdown", - "id": "c1f5cb55", - "metadata": {}, - "source": [ - "## Looking at Summary Data\n", - "\n", - "\n", - " \n", - "Now that Julia has a query, she can use it to look for data in any of the CDA endpoints. She starts by getting an overall summary of what data is available using `count`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "355b1706", - "metadata": {}, - "outputs": [], - "source": [ - "ALLDATA.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "b7ce25fc", - "metadata": {}, - "source": [ - "\n", - " \n", - "It seems there's a lot of data that might work for Julias study! Since she is interested in the beginings of cancer, she decides to start by looking at the researchsubject information, since that is where most of the diagnosis information is. She again gets a summary using `count`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55b0cdeb", - "metadata": {}, - "outputs": [], - "source": [ - "ALLDATA.researchsubject.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "86a323e2", - "metadata": {}, - "source": [ - "## Refining Queries\n", - "\n", - "\n", - " \n", - "Browsing the primary_diagnosis_condition data, Julia notices that there are a large number of research subjects that are Adenomas and Adenocarcinomas. Since Julia wants to look for common phenotypes in early cancers, she decides it might be easier to exclude the endocrine related data, as they might have different mechanisms. So she adds a new filter to her query:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d526198", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "Noadeno = Q('ResearchSubject.primary_diagnosis_condition != \"Adenomas and Adenocarcinomas\"')\n", - "\n", - "NoAdenoData = ALLDATA.AND(Noadeno)\n", - "\n", - "NoAdenoData.researchsubject.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "40a0191d", - "metadata": {}, - "source": [ - "\n", - " \n", - "She then previews the actual metadata for researchsubject, subject, and file, to make sure that they have all the information she will need for her work. Since she's mostly interested in looking at the kinds of data available from each endpoint:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d186b837", - "metadata": {}, - "outputs": [], - "source": [ - "NoAdenoData.researchsubject.run().to_dataframe() # view the dataframe" - ] - }, - { - "cell_type": "markdown", - "id": "086697b3", - "metadata": {}, - "source": [ - "---\n", - "\n", - "
\n", - "\n", - "

ResearchSubject Field Definitions

\n", - "\n", - "A research subject is the entity of interest in a specific research study or project, typically a human being or an animal, but can also be a device, group of humans or animals, or a tissue sample. Human research subjects are usually not traceable to a particular person to protect the subjects privacy. This entity plays the role of the case_id in existing data. An individual who participates in 3 studies will have 3 researchsubject IDs\n", - " \n", - "
    \n", - "
  • id: The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system. For CDA, this is case_id.
  • \n", - "
  • identifier: A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity.
  • \n", - "
  • identifier.system: The system or namespace that defines the identifier.
  • \n", - "
  • identifier.value: The value of the identifier, as defined by the system.
  • \n", - "
  • member_of_research_project: A reference to the Study(s) of which this ResearchSubject is a member.
  • \n", - "
  • primary_diagnosis_condition: The text term used to describe the type of malignant disease, as categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This attribute represents the disease that qualified the subject for inclusion on the ResearchProject.
  • \n", - "
  • primary_diagnosis_site: The text term used to describe the primary site of disease, as categorized by the World Health Organization's (WHO) International Classification of Diseases for Oncology (ICD-O). This categorization groups cases into general categories. This attribute represents the primary site of disease that qualified the subject for inclusion on the ResearchProject.
  • \n", - "
  • subject_id: The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system. Can be joined to the `id` field from subject results
  • \n", - "
\n", - "\n", - "
\n", - " \n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d0f5e2f", - "metadata": {}, - "outputs": [], - "source": [ - "NoAdenoData.subject.run().to_dataframe() # view the dataframe" - ] - }, - { - "cell_type": "markdown", - "id": "dec76132", - "metadata": {}, - "source": [ - "---\n", - "\n", - "
\n", - "\n", - "

Subject Field Definitions

\n", - "\n", - "A patient entity captures the study-independent metadata for research subjects. Human research subjects are usually not traceable to a particular person to protect the subjects privacy.\n", - "\n", - " \n", - "
    \n", - "
  • id: The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.\",STRING
  • \n", - "
  • identifier: A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity.
  • \n", - "
  • identifier.system: The system or namespace that defines the identifier.
  • \n", - "
  • identifier.value: The value of the identifier, as defined by the system.
  • \n", - "
  • species: The taxonomic group (e.g. species) of the patient. For MVP, since taxonomy vocabulary is consistent between GDC and PDC, using text. Ultimately, this will be a term returned by the vocabulary service.
  • \n", - "
  • sex: The biologic character or quality that distinguishes male and female from one another as expressed by analysis of the person's gonadal, morphologic (internal and external), chromosomal, and hormonal characteristics.
  • \n", - "
  • race: An arbitrary classification of a taxonomic group that is a division of a species. It usually arises as a consequence of geographical isolation within a species and is characterized by shared heredity, physical attributes and behavior, and in the case of humans, by common history, nationality, or geographic distribution. The provided values are based on the categories defined by the U.S. Office of Management and Business and used by the U.S. Census Bureau.
  • \n", - "
  • ethnicity: An individual's self-described social and cultural grouping, specifically whether an individual describes themselves as Hispanic or Latino. The provided values are based on the categories defined by the U.S. Office of Management and Business and used by the U.S. Census Bureau.
  • \n", - "
  • days_to_birth: Number of days between the date used for index and the date from a person's date of birth represented as a calculated negative number of days.
  • \n", - "
  • subject_associated_project: The list of Projects associated with the Subject.
  • \n", - "
  • vital_status: Coded value indicating the state or condition of being living or deceased; also includes the case where the vital status is unknown.
  • \n", - "
  • days_to_death: Number of days between the date used for index and the date from a person's date of death represented as a calculated number of days.
  • \n", - "
  • cause_of_death: Coded value indicating the circumstance or condition that results in the death of the subject.
  • \n", - "
\n", - "\n", - "
\n", - " \n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04e04136", - "metadata": {}, - "outputs": [], - "source": [ - "NoAdenoData.file.run().to_dataframe() # view the dataframe" - ] - }, - { - "cell_type": "markdown", - "id": "8cf9f2d3", - "metadata": {}, - "source": [ - "\n", - "---\n", - "\n", - "
\n", - "\n", - "

File Field Definitions

\n", - "\n", - "A file is an information-bearing electronic object that contains a physical embodiment of some information using a particular character encoding.\n", - "\n", - " \n", - "
    \n", - "
  • id: The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system.
  • \n", - "
  • identifier: A 'business' identifier for the entity, typically as provided by an external system or authority, that persists across implementing systems (i.e. a 'logical' identifier). Uses a specialized, complex 'Identifier' data type to capture information about the source of the business identifier - or a URI expressed as a string to an existing entity.
  • \n", - "
  • identifier.system: The system or namespace that defines the identifier.
  • \n", - "
  • identifier.value: The value of the identifier, as defined by the system.
  • \n", - "
  • label: Short name or abbreviation for dataset. Maps to rdfs:label.
  • \n", - "
  • data_catagory: Broad categorization of the contents of the data file.
  • \n", - "
  • data_type: Specific content type of the data file.
  • \n", - "
  • file_format: Format of the data files.
  • \n", - "
  • associated_project: A reference to the Project(s) of which this ResearchSubject is a member. The associated_project may be embedded using the ref definition or may be a reference to the id for the Project - or a URI expressed as a string to an existing entity.
  • \n", - "
  • drs_uri: A string of characters used to identify a resource on the Data Repo Service(DRS). Can be used to retreive this specific file from a server.
  • \n", - "
  • byte_size: Size of the file in bytes. Maps to dcat:byteSize.
  • \n", - "
  • checksum: The md5 value for the file. A digit representing the sum of the correct digits in a piece of stored or transmitted digital data, against which later comparisons can be made to detect errors in the data.
  • \n", - "
  • data_modality: Data modality describes the biological nature of the information gathered as the result of an Activity, independent of the technology or methods used to produce the information. Always one of \"Genomic\", \"Proteomic\", or \"Imaging\".
  • \n", - "
  • imaging_modality: An imaging modality describes the imaging equipment and/or method used to acquire certain structural or functional information about the body. These include but are not limited to computed tomography (CT) and magnetic resonance imaging (MRI). Taken from the DICOM standard.
  • \n", - "
  • dbgap_accession_number: The dbgap accession number for the project.
  • \n", - "
\n", - "\n", - "
\n", - " \n", - "---\n" - ] - }, - { - "cell_type": "markdown", - "id": "ba6aadbe", - "metadata": {}, - "source": [ - "## Working with Results (pagination)\n", - "\n", - "\n", - " \n", - "Finally, Julia wants to save these results to use for the future. Since the preview dataframes only show the first 100 results of each search, she uses the `paginator` function to get all the data from the subject and researchsubject endpoints into their own dataframes:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c2cec2bc", - "metadata": {}, - "outputs": [], - "source": [ - "researchsubs = NoAdenoData.researchsubject.run()\n", - "rsdf = pd.DataFrame()\n", - "for i in researchsubs.paginator(to_df=True):\n", - " rsdf = pd.concat([rsdf, i])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a1258057", - "metadata": {}, - "outputs": [], - "source": [ - "subs = NoAdenoData.subject.run()\n", - "subsdf = pd.DataFrame()\n", - "for i in subs.paginator(to_df=True):\n", - " subsdf = pd.concat([subsdf, i])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04cd73df", - "metadata": {}, - "outputs": [], - "source": [ - "rsdf # view the researchsubject dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92a6f811", - "metadata": {}, - "outputs": [], - "source": [ - "subsdf # view the subject dataframe" - ] - }, - { - "cell_type": "markdown", - "id": "75bcbe86", - "metadata": {}, - "source": [ - "## Merging Results across Endpoints\n", - "\n", - "\n", - " \n", - "Then Julia uses the `id` fields in each result to merge them together into one big dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9b7a3383", - "metadata": {}, - "outputs": [], - "source": [ - "allmetadata = pd.merge(rsdf,\n", - " subsdf,\n", - " left_on=\"subject_id\",\n", - " right_on='id')\n", - "\n", - "allmetadata" - ] - }, - { - "cell_type": "markdown", - "id": "024da831", - "metadata": {}, - "source": [ - "\n", - " \n", - "And saves it out to a csv so she can browse it with Excel:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b6628de4", - "metadata": {}, - "outputs": [], - "source": [ - "allmetadata.to_csv(\"allmetadata.csv\")" - ] - }, - { - "cell_type": "markdown", - "id": "246644d3", - "metadata": {}, - "source": [ - "\n", - " \n", - "Julia knows from her subject count summary that there are more than 200,000 files associated with her subjects, which is likely far more than she needs. To help her decide what files she wants, Julia uses endpoint chaining to get summary information about the files that are assigned to researchsubjects for her search criteria:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ae1ae079", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "NoAdenoData.researchsubject.file.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "111f47c0", - "metadata": {}, - "source": [ - "\n", - " \n", - "Julia decides that a good place to start would be with Slide Images. There's only 1111, so she should be able to quickly scan through them over the next few days and see if they will be useful. So she adds one more filter on her search:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4a170b3", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "JustSlides = Q('file.data_type = \"Slide Image\"')\n", - "NoadenoJustSlides = NoAdenoData.AND(JustSlides)\n", - "NoadenoJustSlides.researchsubject.file.count.run()" - ] - }, - { - "cell_type": "markdown", - "id": "0e385faf", - "metadata": {}, - "source": [ - "\n", - " \n", - "Finally, Julia uses the pagenation function again to get all the slide files, and merges her metadata file with this file information. This way she will be able to review what phenotypes each slide is associated with:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f45f6503", - "metadata": {}, - "outputs": [], - "source": [ - "slides = NoadenoJustSlides.researchsubject.file.run()\n", - "slidesdf = pd.DataFrame()\n", - "for i in slides.paginator(to_df=True):\n", - " slidesdf = pd.concat([slidesdf, i])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d1f7a21", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "slidemetadata = pd.merge(slidesdf, \n", - " allmetadata, \n", - " on=\"subject_id\")\n", - "slidemetadata" - ] - }, - { - "cell_type": "markdown", - "id": "3abb0f32", - "metadata": {}, - "source": [ - "\n", - " \n", - "She saves this file out as well." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ef6e0fb", - "metadata": {}, - "outputs": [], - "source": [ - "slidemetadata.to_csv(\"slidemetadata.csv\")" - ] - }, - { - "cell_type": "markdown", - "id": "369a3068", - "metadata": {}, - "source": [ - "\n", - " \n", - "Now Julia has all the information she needs to begin work on her project. She can use the `drs_id` column information to directly download the images she is interested in using a DRS resolver, or she can input the DRS IDs at a cloud workspace such as [Terra](https://terra.bio/) or the [Cancer Genomics Cloud](https://www.cancergenomicscloud.org/) to view the images online. In either case, she has all the metadata she needs to get started, and can save this notebook of her work in case she'd like to come back and modify her search." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/Untitled.ipynb b/notebooks/Untitled.ipynb deleted file mode 100644 index e6099756..00000000 --- a/notebooks/Untitled.ipynb +++ /dev/null @@ -1,1371 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "a93a79c9-352f-41cb-9113-6daadaef1d99", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'2022.11.3'" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cdapython import Q,columns,unique_terms\n", - "Q.get_version()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "525765be-da36-4349-8ca7-546fc95c15d7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
https://cancerdata.dsde-dev.broadinstitute.org/\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[4;94mhttps://cancerdata.dsde-dev.broadinstitute.org/\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
broad-dsde-dev.cda_dev\n",
-       "
\n" - ], - "text/plain": [ - "broad-dsde-dev.cda_dev\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "Q.set_default_project_dataset(\"broad-dsde-dev.cda_dev\")\n", - "Q.set_host_url(\"https://cancerdata.dsde-dev.broadinstitute.org/\")\n", - "\n", - "print(Q.get_host_url())\n", - "print(Q.get_default_project_dataset())" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "051e1ff6-e493-4a39-81ad-4459fef8a980", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 8.493 sec 8493 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m8.493\u001b[0m sec \u001b[1;36m8493\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9110309fa3c143f5951a300bf29719f4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 0\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m0\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 1000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 2000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m2000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 3000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m3000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 4000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m4000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 5000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m5000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 6000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m6000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 7000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m7000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 8000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m8000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 9000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m9000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 10000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m10000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 11000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m11000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 13000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m13000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 14000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m14000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 15000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m15000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 16000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m16000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 17000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m17000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 18000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m18000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 19000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m19000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 20000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m20000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 21000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m21000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 22000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m22000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 23000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m23000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 24000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m24000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 25000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m25000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 26000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m26000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 27000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m27000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 28000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m28000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 29000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m29000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 30000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m30000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 31000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m31000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 32000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m32000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 33000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m33000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 34000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m34000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 35000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m35000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 36000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m36000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 37000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m37000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 38000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m38000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 39000\n",
-       "            Count: 1000\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: True\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m39000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m1000\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;92;40mTrue\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "            \n",
-       "            Offset: 40000\n",
-       "            Count: 252\n",
-       "            Total Row Count: 40252\n",
-       "            More pages: False\n",
-       "            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[37;40m \u001b[0m\n", - "\u001b[37;40m Offset: \u001b[0m\u001b[1;36;40m40000\u001b[0m\n", - "\u001b[37;40m Count: \u001b[0m\u001b[1;36;40m252\u001b[0m\n", - "\u001b[37;40m Total Row Count: \u001b[0m\u001b[1;36;40m40252\u001b[0m\n", - "\u001b[37;40m More pages: \u001b[0m\u001b[3;91;40mFalse\u001b[0m\n", - "\u001b[37;40m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
40252\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m40252\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "n = Q(\"sex = 'male' AND sex != 'null'\").run(limit=1000)\n", - "box = []\n", - "for i in n.paginator():\n", - " print(i, style=\"white on black\")\n", - " box.extend(i)\n", - "\n", - "print(len(box))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "75d36ce1-b772-4120-a695-68eb39f6e3dd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                    subject_id  \\\n",
-       "0                          103   \n",
-       "1                      11CO042   \n",
-       "2                      11CO057   \n",
-       "3                          121   \n",
-       "4                         2201   \n",
-       "...                        ...   \n",
-       "40247             TCGA-YL-A8S8   \n",
-       "40248             TCGA-YL-A8SL   \n",
-       "40249  UTRI_SUBJECT_001_000086   \n",
-       "40250  UTRI_SUBJECT_001_000241   \n",
-       "40251  UTRI_SUBJECT_001_000323   \n",
-       "\n",
-       "                                      subject_identifier       species   sex  \\\n",
-       "0                    [{'system': 'PDC', 'value': '103'}]  homo sapiens  male   \n",
-       "1      [{'system': 'GDC', 'value': '11CO042'}, {'syst...  homo sapiens  male   \n",
-       "2      [{'system': 'GDC', 'value': '11CO057'}, {'syst...  homo sapiens  male   \n",
-       "3                    [{'system': 'PDC', 'value': '121'}]  homo sapiens  male   \n",
-       "4                   [{'system': 'GDC', 'value': '2201'}]  homo sapiens  male   \n",
-       "...                                                  ...           ...   ...   \n",
-       "40247  [{'system': 'GDC', 'value': 'TCGA-YL-A8S8'}, {...  homo sapiens  male   \n",
-       "40248  [{'system': 'GDC', 'value': 'TCGA-YL-A8SL'}, {...  homo sapiens  male   \n",
-       "40249  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "40250  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "40251  [{'system': 'GDC', 'value': 'UTRI_SUBJECT_001_...  homo sapiens  male   \n",
-       "\n",
-       "               race               ethnicity  days_to_birth  \\\n",
-       "0           chinese            not reported            NaN   \n",
-       "1             white  not hispanic or latino            NaN   \n",
-       "2             white  not hispanic or latino            NaN   \n",
-       "3           chinese            not reported            NaN   \n",
-       "4             white                 Unknown            NaN   \n",
-       "...             ...                     ...            ...   \n",
-       "40247         white  not hispanic or latino            NaN   \n",
-       "40248         white  not hispanic or latino            NaN   \n",
-       "40249  not reported            not reported            NaN   \n",
-       "40250  not reported            not reported            NaN   \n",
-       "40251  not reported            not reported            NaN   \n",
-       "\n",
-       "                              subject_associated_project  vital_status  \\\n",
-       "0      [Integrated Proteogenomic Characterization of ...          Dead   \n",
-       "1                                  [CPTAC-2, cptac_coad]  Not Reported   \n",
-       "2                                  [CPTAC-2, cptac_coad]  Not Reported   \n",
-       "3      [Integrated Proteogenomic Characterization of ...         Alive   \n",
-       "4                                    [BEATAML1.0-COHORT]         Alive   \n",
-       "...                                                  ...           ...   \n",
-       "40247                             [tcga_prad, TCGA-PRAD]         Alive   \n",
-       "40248                             [tcga_prad, TCGA-PRAD]         Alive   \n",
-       "40249                                         [TRIO-CRU]  Not Reported   \n",
-       "40250                                         [TRIO-CRU]  Not Reported   \n",
-       "40251                                         [TRIO-CRU]  Not Reported   \n",
-       "\n",
-       "       days_to_death cause_of_death  \n",
-       "0                NaN     Metastasis  \n",
-       "1                NaN           None  \n",
-       "2                NaN           None  \n",
-       "3                NaN           None  \n",
-       "4                NaN           None  \n",
-       "...              ...            ...  \n",
-       "40247            NaN           None  \n",
-       "40248            NaN           None  \n",
-       "40249            NaN           None  \n",
-       "40250            NaN           None  \n",
-       "40251            NaN           None  \n",
-       "\n",
-       "[40252 rows x 11 columns]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[37;44m subject_id \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m103\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m 11CO042 \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m 11CO057 \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m121\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;36;44m2201\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m TCGA-YL-A8S8 \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m TCGA-YL-A8SL \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000086 \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000241 \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m UTRI_SUBJECT_001_000323 \u001b[0m\n", - "\n", - "\u001b[37;44m subject_identifier species sex \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'PDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'103'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'11CO042'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[37;44m'syst\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'11CO057'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[37;44m'syst\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'PDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'121'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'2201'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-YL-A8S8'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'TCGA-YL-A8SL'\u001b[0m\u001b[1;37;44m}\u001b[0m\u001b[37;44m, \u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[1;37;44m{\u001b[0m\u001b[32;44m'system'\u001b[0m\u001b[37;44m: \u001b[0m\u001b[32;44m'GDC'\u001b[0m\u001b[37;44m, \u001b[0m\u001b[32;44m'value'\u001b[0m\u001b[37;44m: 'UTRI_SUBJECT_001_\u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m homo sapiens male \u001b[0m\n", - "\n", - "\u001b[37;44m race ethnicity days_to_birth \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m chinese not reported NaN \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m chinese not reported NaN \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m white Unknown NaN \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m white not hispanic or latino NaN \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m not reported not reported NaN \u001b[0m\n", - "\n", - "\u001b[37;44m subject_associated_project vital_status \\\u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mIntegrated Proteogenomic Characterization of \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m Dead \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mCPTAC-\u001b[0m\u001b[1;36;44m2\u001b[0m\u001b[37;44m, cptac_coad\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mCPTAC-\u001b[0m\u001b[1;36;44m2\u001b[0m\u001b[37;44m, cptac_coad\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mIntegrated Proteogenomic Characterization of \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m Alive \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mBEATAML1.\u001b[0m\u001b[1;36;44m0\u001b[0m\u001b[37;44m-COHORT\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mtcga_prad, TCGA-PRAD\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mtcga_prad, TCGA-PRAD\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Alive \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m \u001b[0m\u001b[1;37;44m[\u001b[0m\u001b[37;44mTRIO-CRU\u001b[0m\u001b[1;37;44m]\u001b[0m\u001b[37;44m Not Reported \u001b[0m\n", - "\n", - "\u001b[37;44m days_to_death cause_of_death \u001b[0m\n", - "\u001b[1;36;44m0\u001b[0m\u001b[37;44m NaN Metastasis \u001b[0m\n", - "\u001b[1;36;44m1\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m2\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m3\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m4\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\u001b[33;44m...\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40247\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40248\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40249\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40250\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\u001b[1;36;44m40251\u001b[0m\u001b[37;44m NaN \u001b[0m\u001b[3;35;44mNone\u001b[0m\u001b[37;44m \u001b[0m\n", - "\n", - "\u001b[1;37;44m[\u001b[0m\u001b[1;36;44m40252\u001b[0m\u001b[37;44m rows x \u001b[0m\u001b[1;36;44m11\u001b[0m\u001b[37;44m columns\u001b[0m\u001b[1;37;44m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import pandas as pd\n", - "df = pd.DataFrame(box)\n", - "print(df, style=\"white on blue\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "284ab4e3-8c39-452c-bcae-2a6d08b9b29e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/Untitled1.ipynb b/notebooks/Untitled1.ipynb deleted file mode 100644 index 37a153c8..00000000 --- a/notebooks/Untitled1.ipynb +++ /dev/null @@ -1,411 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "b974248c-48b3-4233-8211-4677aa63d377", - "metadata": {}, - "outputs": [], - "source": [ - "from cdapython import Q,unique_terms" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "1764bcf2-8ecf-45c8-b617-fe239ceb7b95", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
https://cancerdata.dsde-dev.broadinstitute.org/\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[4;94mhttps://cancerdata.dsde-dev.broadinstitute.org/\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
broad-dsde-dev.cda_dev\n",
-       "
\n" - ], - "text/plain": [ - "broad-dsde-dev.cda_dev\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "Q.set_default_project_dataset(\"broad-dsde-dev.cda_dev\")\n", - "Q.set_host_url(\"https://cancerdata.dsde-dev.broadinstitute.org/\")\n", - "\n", - "print(Q.get_host_url())\n", - "print(Q.get_default_project_dataset())" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "81058c54-afe6-4592-b5e7-46c61e646ff1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Total execution time: 7374 ms 7.374 sec 0 min\n",
-       "
\n" - ], - "text/plain": [ - "Total execution time: \u001b[1;36m7374\u001b[0m ms \u001b[1;36m7.374\u001b[0m sec \u001b[1;36m0\u001b[0m min\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\n", - " Query:SELECT results.* EXCEPT(rn) FROM (SELECT ROW_NUMBER() OVER (PARTITION BY all_Files_v3_0_final.id) as rn, all_Subjects_v3_0_final.id AS id FROM broad-dsde-dev.cda_dev.all_Subjects_v3_0_final AS all_Subjects_v3_0_final INNER JOIN UNNEST(all_Subjects_v3_0_final.ResearchSubject) AS _ResearchSubject INNER JOIN UNNEST(_ResearchSubject.Specimen) AS _ResearchSubject_Specimen INNER JOIN UNNEST(_ResearchSubject_Specimen.Files) AS _ResearchSubject_Specimen_Files INNER JOIN broad-dsde-dev.cda_dev.all_Files_v3_0_final AS all_Files_v3_0_final ON all_Files_v3_0_final.id = _ResearchSubject_Specimen_Files WHERE ((IFNULL(UPPER(_ResearchSubject_Specimen.specimen_type), '') = UPPER('slide')) OR (IFNULL(UPPER(all_Files_v3_0_final.data_type), '') = UPPER('Slide Image')))) as results WHERE rn = 1\n", - " Offset: 0\n", - " Count: 100\n", - " Total Row Count: 496454\n", - " More pages: True\n", - " " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mylist = Q('ResearchSubject.Specimen.specimen_type= \"slide\" OR file.data_type = \"Slide Image\"').specimen.file.run(filter=\"id\"\n", - ", show_sql=True)\n", - "mylist" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "658895d4-6a85-4224-a610-3cdd53b450d3", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4a5327089fdc40699a88fd043a569a01", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "p = mylist.auto_paginator(to_df=True,limit=20000)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b1b65a54-92aa-4d76-ab06-d9d8dd18ce33", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 496454 entries, 0 to 99\n", - "Data columns (total 1 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 496454 non-null object\n", - "dtypes: object(1)\n", - "memory usage: 7.6+ MB\n" - ] - } - ], - "source": [ - "p.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5182c3c-1338-445e-922c-40080f784254", - "metadata": {}, - "outputs": [], - "source": [ - "localhost = \"http://localhost:8080\"\n", - "d = unique_terms(\n", - " \"species\", host=localhost, table=\"gdc-bq-sample.dev\", show_sql=True\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "20d0de23-1a78-4bdc-bd28-0af00b6b44c0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
speciesCount
0canis familiaris57
1home sapiens1
2homo sapiens138023
3homo sapiens; mus musculus41
4internal reference - pooled sample1
5jhu qc1
6mus musculus183
7normal only ir1
8not reported19
9pnnl-jhu ref1
10ref1
11taiwanese ir1
12tumor only ir1
\n", - "
" - ], - "text/plain": [ - " species Count\n", - "0 canis familiaris 57\n", - "1 home sapiens 1\n", - "2 homo sapiens 138023\n", - "3 homo sapiens; mus musculus 41\n", - "4 internal reference - pooled sample 1\n", - "5 jhu qc 1\n", - "6 mus musculus 183\n", - "7 normal only ir 1\n", - "8 not reported 19\n", - "9 pnnl-jhu ref 1\n", - "10 ref 1\n", - "11 taiwanese ir 1\n", - "12 tumor only ir 1" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81f0a38a-3cdf-4330-82bd-850cf2c094d6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Waiting for results\n",
-       "
\n" - ], - "text/plain": [ - "Waiting for results\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "d = Q('File.associated_project = \"%cptac%\"').file.run(limit=2000,async_call=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a081b6a-1f56-44a8-98de-7fc322092cbc", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.13" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/example-tester.ipynb b/notebooks/example-tester.ipynb deleted file mode 100644 index bca7d28d..00000000 --- a/notebooks/example-tester.ipynb +++ /dev/null @@ -1,1120 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "51e04dd6", - "metadata": {}, - "source": [ - "# CDA Python: Features & Examples\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "6c459428", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b113462f9487430e9d87a8b89b452359", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Dropdown(description='Tester:', options=(True, False), value=True)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from ipywidgets import Dropdown\n", - "from IPython.display import display\n", - "tester_check = Dropdown(\n", - " options=[True,False],\n", - " description='Tester:',\n", - " value=True,\n", - ")\n", - "display(tester_check)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "2255e2ad", - "metadata": {}, - "source": [ - "The following examples illustrate some ```CDA Python``` features while providing insights into the underlying data structure (**Getting started**). To demonstrate those features, we provide a few relevant text queries along with step-by-step explanations on how to translate those into the ```CDA Python``` queries (**Example queries**). Finally, there are a few additional queries intended for the test users to play around with and send feedback to the CDA team (**Test queries**)." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "f078fc5f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
/opt/conda/lib/python3.11/site-packages/cdapython/__init__.py\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[35m/opt/conda/lib/python3.11/site-packages/cdapython/\u001b[0m\u001b[95m__init__.py\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
2023.5.4\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36m2023.5\u001b[0m.\u001b[1;36m4\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from cdapython import Q, columns, unique_terms\n", - "import cdapython,pandas as pd\n", - "print(cdapython.__file__)\n", - "print(cdapython.__version__)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "ecdad705", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                            Total execution time: 0\n",
-       "                            min 8.817 sec 8817 ms\n",
-       "                            \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m8.817\u001b[0m sec \u001b[1;36m8817\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "a = Q(\"sex = 'male'\").run()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "01f58251", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
subject_idsubject_identifierspeciessexraceethnicitydays_to_birthsubject_associated_projectvital_statusdays_to_deathcause_of_death
0BEATAML1.0.1039[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalenot reportednot reportedNaN[BEATAML1.0-CRENOLANIB]DeadNaNNone
1BEATAML1.0.2050[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmaleasiannot hispanic or latinoNaN[BEATAML1.0-COHORT]UnknownNaNNone
2BEATAML1.0.2116[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmaleUnknownnot hispanic or latinoNaN[BEATAML1.0-COHORT]DeadNaNNone
3BEATAML1.0.2285[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmaleUnknownUnknownNaN[BEATAML1.0-COHORT]AliveNaNNone
4BEATAML1.0.2295[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latinoNaN[BEATAML1.0-COHORT]DeadNaNNone
....................................
95FM.AD9848[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalenot reportednot reportedNaN[FM-AD]Not ReportedNaNNone
96GENIE.GENIE-DFCI-000475[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latino-21549.0[GENIE-DFCI]Not ReportedNaNNone
97GENIE.GENIE-DFCI-001317[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latino-26298.0[GENIE-DFCI]Not ReportedNaNNone
98GENIE.GENIE-DFCI-001483[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latino-22280.0[GENIE-DFCI]Not ReportedNaNNone
99GENIE.GENIE-DFCI-001844[{'system': 'GDC', 'field_name': 'case.submitt...Homo sapiensmalewhitenot hispanic or latino-21915.0[GENIE-DFCI]Not ReportedNaNNone
\n", - "

100 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " subject_id \\\n", - "0 BEATAML1.0.1039 \n", - "1 BEATAML1.0.2050 \n", - "2 BEATAML1.0.2116 \n", - "3 BEATAML1.0.2285 \n", - "4 BEATAML1.0.2295 \n", - ".. ... \n", - "95 FM.AD9848 \n", - "96 GENIE.GENIE-DFCI-000475 \n", - "97 GENIE.GENIE-DFCI-001317 \n", - "98 GENIE.GENIE-DFCI-001483 \n", - "99 GENIE.GENIE-DFCI-001844 \n", - "\n", - " subject_identifier species sex \\\n", - "0 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "1 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "2 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "3 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "4 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - ".. ... ... ... \n", - "95 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "96 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "97 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "98 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "99 [{'system': 'GDC', 'field_name': 'case.submitt... Homo sapiens male \n", - "\n", - " race ethnicity days_to_birth \\\n", - "0 not reported not reported NaN \n", - "1 asian not hispanic or latino NaN \n", - "2 Unknown not hispanic or latino NaN \n", - "3 Unknown Unknown NaN \n", - "4 white not hispanic or latino NaN \n", - ".. ... ... ... \n", - "95 not reported not reported NaN \n", - "96 white not hispanic or latino -21549.0 \n", - "97 white not hispanic or latino -26298.0 \n", - "98 white not hispanic or latino -22280.0 \n", - "99 white not hispanic or latino -21915.0 \n", - "\n", - " subject_associated_project vital_status days_to_death cause_of_death \n", - "0 [BEATAML1.0-CRENOLANIB] Dead NaN None \n", - "1 [BEATAML1.0-COHORT] Unknown NaN None \n", - "2 [BEATAML1.0-COHORT] Dead NaN None \n", - "3 [BEATAML1.0-COHORT] Alive NaN None \n", - "4 [BEATAML1.0-COHORT] Dead NaN None \n", - ".. ... ... ... ... \n", - "95 [FM-AD] Not Reported NaN None \n", - "96 [GENIE-DFCI] Not Reported NaN None \n", - "97 [GENIE-DFCI] Not Reported NaN None \n", - "98 [GENIE-DFCI] Not Reported NaN None \n", - "99 [GENIE-DFCI] Not Reported NaN None \n", - "\n", - "[100 rows x 11 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a.to_dataframe()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f759a7bb", - "metadata": {}, - "source": [ - "## Getting started" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "040aa164", - "metadata": {}, - "source": [ - "Print out the list of available fields with ```columns()```:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1a1b8cbd", - "metadata": {}, - "outputs": [], - "source": [ - "columns()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "11a24e81", - "metadata": {}, - "source": [ - "All of the above fields are what describes the highest entity in the data structure hierarchy – ```Patient``` entity. The first five fields represent ```Patient``` demographic information, while the ```ResearchSubject``` entity contains details that we are used to seeing within the nodes' ```Case``` record.\n", - "\n", - "One of the contributions of the CDA is aggregated ```ResearchSubject``` information. This means that all ```ResearchSubject``` records coming from the same subject are now gathered under the Patient entity. As we know, certain specimens are studied in multiple projects (being part of a single data node or multiple nodes) as different ```ResearchSubject``` entries. Those ```ResearchSubject``` entries are collected as a list under the ```ResearchSubject``` entity. One example of this is the patient record with ```id = TCGA-E2-A10A``` which contains two ```ResearchSubject``` entries, one from GDC and the other from PDC.\n", - "\n", - "Note that the ```ResearchSubject``` entity is a list of records, as many other entities above are. **There are certain considerations that should be made when creating the queries by using the fields that come from lists, but more about that will follow in examples below**.\n", - "\n", - "The names in the list may look familiar to you, but they may have been renamed or restructured in the CDA. The field name mappings are described in the _CDA Schema Field Mapping_ document that is linked in the _Testing Guide_. A more direct way to explore and understand the fields is to use the ```unique_terms()``` function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6f1b8ae8", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.Specimen.source_material_type\",limit=10)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "06c24c59", - "metadata": {}, - "source": [ - "Additionally, you can specify a particular data node by using the ```system``` argument:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2010208d", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.Specimen.source_material_type\", system=\"PDC\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b99dff03", - "metadata": {}, - "source": [ - "Now, let's dive into the querying!\n", - "\n", - "We can start by getting the record for ```id = TCGA-E2-A10A``` that we mentioned earlier:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "33e967c1", - "metadata": {}, - "outputs": [], - "source": [ - "q = Q('id = \"TCGA-E2-A10A\"') # note the double quotes for the string value\n", - "\n", - "r = q.counts(host=\"http://35.192.60.10:8080/\")\n", - "\n", - "\n", - "print(r)\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5c8cedfd", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "61e51afd", - "metadata": {}, - "source": [ - "We see that we've got a single patient record as a result, which is what we expect.\n", - "\n", - "Let's see how the result looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b57f607a", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "r[0]" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "7ab4a726", - "metadata": {}, - "source": [ - "The record is pretty large, so we'll print out ```identifier``` values for each ```ResearchSubject``` to confirm that we have one ```ResearchSubject``` that comes from GDC, and one that comes from PDC:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "016d24bf", - "metadata": {}, - "outputs": [], - "source": [ - "for research_subject in r[0]['ResearchSubject']:\n", - " print(research_subject['identifier'])" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "5a80105b", - "metadata": {}, - "source": [ - "The values represent ```ResearchSubject``` IDs and are equivalent to ```case_id``` values in data nodes." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "438717f3", - "metadata": {}, - "source": [ - "## Example queries" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "052970b9", - "metadata": {}, - "source": [ - "Now that we can create a query with ```Q()``` function, let's see how we can combine multiple conditions.\n", - "\n", - "There are three operators available:\n", - "* ```And()```\n", - "* ```Or()```\n", - "* ```From()```\n", - "\n", - "The following examples show how those operators work in practice." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "2ebb5e06", - "metadata": {}, - "source": [ - "### Query 1\n", - "\n", - "**Find data for subjects who were diagnosed after the age of 50 and who were investigated as part of the TCGA-OV project.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43d709a5", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Diagnosis.age_at_diagnosis > 50*365')\n", - "q2 = Q('ResearchSubject.associated_project = \"TCGA-OV\"')\n", - "\n", - "q = q1.AND(q2)\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c1dd55f6", - "metadata": {}, - "source": [ - "### Query 2\n", - "\n", - "**Find data for donors with melanoma (Nevi and Melanomas) diagnosis and who were diagnosed before the age of 30.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "521d0088", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "\n", - "q = q1.AND(q2)\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d126823c", - "metadata": {}, - "source": [ - "In addition, we can check how many records come from particular systems by adding one more condition to the query:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25d9258a", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "q3 = Q('ResearchSubject.Specimen.identifier.system = \"GDC\"')\n", - "\n", - "q = q1.AND(q2.AND(q3))\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c06e9a8d", - "metadata": {}, - "source": [ - "By comparing the ```Count``` value of the two results we can see that all the patients returned in the initial query are coming from the GDC.\n", - "\n", - "To explore the results further, we can fetch the patient JSON objects by iterating through the results:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4cb20150", - "metadata": {}, - "outputs": [], - "source": [ - "projects = set()\n", - "\n", - "for patient in r:\n", - " research_subjects = patient['ResearchSubject']\n", - " for rs in research_subjects:\n", - " projects.add(rs['associated_project'])\n", - "\n", - "print(projects)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "6d8f8cc3", - "metadata": {}, - "source": [ - "The output shows the projects where _Nevi and Melanomas_ cases appear." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "12c650a3", - "metadata": {}, - "source": [ - "### Query 3\n", - "\n", - "**Identify all samples that meet the following conditions:**\n", - "\n", - "* **Sample is from primary tumor**\n", - "* **Disease is ovarian or breast cancer**\n", - "* **Subjects are females under the age of 60 years**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "95920a42", - "metadata": {}, - "outputs": [], - "source": [ - "tumor_type = Q('ResearchSubject.Specimen.source_material_type = \"Primary Tumor\"')\n", - "disease1 = Q('ResearchSubject.primary_disease_site = \"Ovary\"')\n", - "disease2 = Q('ResearchSubject.primary_disease_site = \"Breast\"')\n", - "demographics1 = Q('sex = \"female\"')\n", - "demographics2 = Q('days_to_birth > -60*365') # note that days_to_birth is a negative value\n", - "\n", - "q1 = tumor_type.AND(demographics1.AND(demographics2))\n", - "q2 = disease1.OR(disease2)\n", - "q = q1.AND(q2)\n", - "\n", - "r = q.run()\n", - "print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "713ac984", - "metadata": {}, - "source": [ - "In this case, we have a result that contains more than 1000 records which is the default page size. To load the next 1000 records, we can use the ```next_page()``` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "efdadbb9", - "metadata": {}, - "outputs": [], - "source": [ - "r2 = r.next_page()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1419fc7", - "metadata": {}, - "outputs": [], - "source": [ - "print(r2)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a960e07d", - "metadata": {}, - "source": [ - "Alternatively, we can use the ```offset``` argument to specify the record to start from:\n", - "\n", - "```\n", - "...\n", - "r = q.run(offset=1000)\n", - "print(r)\n", - "```" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "41db78e2", - "metadata": {}, - "source": [ - "### Query 4\n", - "\n", - "**Find data for donors with \"Ovarian Serous Cystadenocarcinoma\" with proteomic and genomic data.**" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b9171d93", - "metadata": {}, - "source": [ - "**Note that disease type value denoting the same disease groups can be completely different within different systems. This is where CDA features come into play.** We first start by exploring the values available for this particular field in both systems." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "33d8294f", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms('ResearchSubject.primary_disease_type', system=\"GDC\",limit=10)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "7495d1e4", - "metadata": {}, - "source": [ - "Since “Ovarian Serous Cystadenocarcinoma” doesn’t appear in GDC values we decide to look into the PDC:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "aac21adf", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms('ResearchSubject.primary_disease_type', system=\"PDC\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a2ec177f", - "metadata": {}, - "source": [ - "After examining the output, we see that it does come from the PDC. Hence, if we could first identify the data that has research subjects found within the PDC that have this particular disease type, and then further narrow down the results to include only the portion of the data that is present in GDC, we could get the records that we are looking for." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "19a05c04", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "                        Total execution time: 0\n",
-       "                        min 3.927 sec 3927 ms\n",
-       "                        \n",
-       "
\n" - ], - "text/plain": [ - "\n", - " Total execution time: \u001b[1;36m0\u001b[0m\n", - " min \u001b[1;36m3.927\u001b[0m sec \u001b[1;36m3927\u001b[0m ms\n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\n", - " Query:SELECT results.* EXCEPT(rn) FROM (SELECT ROW_NUMBER() OVER (PARTITION BY Subject.id) as rn, Subject.id AS subject_id, Subject.identifier AS subject_identifier, Subject.species AS species, Subject.sex AS sex, Subject.race AS race, Subject.ethnicity AS ethnicity, Subject.days_to_birth AS days_to_birth, Subject.subject_associated_project AS subject_associated_project, Subject.vital_status AS vital_status, Subject.days_to_death AS days_to_death, Subject.cause_of_death AS cause_of_death FROM gdc-bq-sample.dev.all_Subjects_v3_0_final AS Subject LEFT JOIN UNNEST(Subject.identifier) AS _subject_identifier WHERE (IFNULL(UPPER(_subject_identifier.system), '') = UPPER('GDC'))) as results WHERE rn = 1\n", - " Offset: 0\n", - " Count: 100\n", - " Total Row Count: 85464\n", - " More pages: True\n", - " " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# q1 = Q('ResearchSubject.primary_disease_type = \"Ovarian Serous Cystadenocarcinoma\"')\n", - "# q2 = Q('ResearchSubject.identifier.system = \"PDC\"')\n", - "# q3 = Q('ResearchSubject.identifier.system = \"GDC\"')\n", - "\n", - "# q = q3.FROM(q1.AND(q2))\n", - "# r = q.run()\n", - "\n", - "# print(r)\n", - "\n", - "\n", - "from cdapython import Q\n", - "\n", - "\n", - "integration_host = \"http://35.192.60.10:8080/\"\n", - "integration_table = \"gdc-bq-sample.dev\"\n", - "Q('subject_identifier_system = \"GDC\" FROM subject_identifier_system = \"PDC\" FROM subject_identifier_system = \"IDC\"').run(host=integration_host,table=integration_table,show_sql=True)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b1d3d6a0", - "metadata": {}, - "source": [ - "As you can see, this is achieved by utilizing ```From``` operator. The ```From``` operator allows us to create queries from results of other queries. This is particularly useful when working with conditions that involve a single field which can take multiple different values for different items in a list that is being part of, e.g. we need ```ResearchSubject.identifier.system``` to be both “PDC” and “GDC” for a single patient. In such cases, ```And``` operator can’t help because it will return those entries where the field takes both values, which is zero entries." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5ec883f", - "metadata": {}, - "outputs": [], - "source": [ - "for i in Q.sql(\"SELECT * FROM `gdc-bq-sample.cda_mvp.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` WHERE table_name = 'v3' Limit 5\"):\n", - " print(i)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc992d2a", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.identifier.system = \"GDC\" FROM ResearchSubject.primary_disease_type = \"Ovarian Serous Cystadenocarcinoma\" AND ResearchSubject.identifier.system = \"PDC\"')\n", - "result = q1.run(async_call=True)\n", - "print(result)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "89b96ee0", - "metadata": {}, - "source": [ - "## Test queries" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3cdcf002", - "metadata": {}, - "source": [ - "Now that we've successfully run and analyzed a few queries, here are a few additional ones you can try out on your own.\n", - "\n", - "Solutions can be shared with the CDA team as indicated in the _Testing Guide_ document." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f5fee345", - "metadata": {}, - "source": [ - "### Test Query 1\n", - "\n", - "**Find data from TCGA-BRCA project, with donors over the age of 50 with Stage IIIC cancer.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92ccb890", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4fc36929", - "metadata": {}, - "source": [ - "### Test Query 2\n", - "\n", - "**Find data from all patients who have been treated with \"Radiation Therapy, NOS\" and have both genomic and proteomic data.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a987983", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "e11f76f7", - "metadata": {}, - "source": [ - "### Test Query 3\n", - "\n", - "**Find data from all subjects with lung adenocarcinomas that have both primary and recurrent tumors.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2cbcd5e", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a9045266-ecda-4692-b732-d4b8c660801a", - "metadata": {}, - "outputs": [], - "source": [ - "q = Q('id = \"TCGA-13-1409\"')\n", - "r = q.run(limit = 20, host=\"http://localhost:8080\")\n", - "# r.to_dataframe(\n", - "# record_path =['ResearchSubject','File'],\n", - "# meta=['id','species','sex','race','ethnicity'],\n", - "# meta_prefix= 'subject_', \n", - "# max_level = 0,\n", - "# errors='ignore'\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8a346016-6ab6-4e02-94ad-22bd8163bbb9", - "metadata": {}, - "outputs": [], - "source": [ - "r.to_dataframe(record_path=['ResearchSubject'],meta=['id','species','sex','race','ethnicity'],meta_prefix= 'subject_',)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5475f4cd-6ce2-4ce7-b3f7-a292e31c6e76", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "q = q1.AND(q2)\n", - "print(q)\n", - "r = q.run(host=\"http://localhost:8080\")\n", - "print(r)" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - }, - "metadata": { - "interpreter": { - "hash": "5c867d1980d5b66d2bfc8a5903dcee074b4b68f3917a4f27f8a310cab24e9f1b" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb deleted file mode 100644 index 86fe5588..00000000 --- a/notebooks/example.ipynb +++ /dev/null @@ -1,825 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "8ee54fa1", - "metadata": {}, - "source": [ - "# CDA Python: Features & Examples\n", - "---" - ] - }, - { - "cell_type": "markdown", - "id": "35217356", - "metadata": {}, - "source": [ - "The following examples illustrate some ```CDA Python``` features while providing insights into the underlying data structure (**Getting started**). To demonstrate those features, we provide a few relevant text queries along with step-by-step explanations on how to translate those into the ```CDA Python``` queries (**Example queries**). Finally, there are a few additional queries intended for the test users to play around with and send feedback to the CDA team (**Test queries**)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5245b7b0", - "metadata": {}, - "outputs": [], - "source": [ - "from cdapython import Q, columns, unique_terms,query\n", - "import cdapython\n", - "print(cdapython.__file__)\n", - "print(cdapython.__version__)" - ] - }, - { - "cell_type": "markdown", - "id": "bccff533", - "metadata": {}, - "source": [ - "## Getting started" - ] - }, - { - "cell_type": "markdown", - "id": "36d1d3ec", - "metadata": {}, - "source": [ - "Print out the list of available fields with ```columns()```:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b83e40e6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/dboles/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/urllib3/connectionpool.py:1020: InsecureRequestWarning: Unverified HTTPS request is being made to host 'cda.cda-dev.broadinstitute.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n", - " InsecureRequestWarning,\n" - ] - }, - { - "data": { - "text/plain": [ - "['id',\n", - " 'identifier',\n", - " 'identifier.system',\n", - " 'identifier.value',\n", - " 'sex',\n", - " 'race',\n", - " 'ethnicity',\n", - " 'days_to_birth',\n", - " 'subject_associated_project',\n", - " 'File',\n", - " 'File.id',\n", - " 'File.identifier',\n", - " 'File.identifier.system',\n", - " 'File.identifier.value',\n", - " 'File.label',\n", - " 'File.data_category',\n", - " 'File.data_type',\n", - " 'File.file_format',\n", - " 'File.associated_project',\n", - " 'File.drs_uri',\n", - " 'File.byte_size',\n", - " 'File.checksum',\n", - " 'ResearchSubject',\n", - " 'ResearchSubject.id',\n", - " 'ResearchSubject.identifier',\n", - " 'ResearchSubject.identifier.system',\n", - " 'ResearchSubject.identifier.value',\n", - " 'ResearchSubject.associated_project',\n", - " 'ResearchSubject.primary_disease_type',\n", - " 'ResearchSubject.primary_disease_site',\n", - " 'ResearchSubject.Diagnosis',\n", - " 'ResearchSubject.Diagnosis.id',\n", - " 'ResearchSubject.Diagnosis.identifier',\n", - " 'ResearchSubject.Diagnosis.identifier.system',\n", - " 'ResearchSubject.Diagnosis.identifier.value',\n", - " 'ResearchSubject.Diagnosis.primary_diagnosis',\n", - " 'ResearchSubject.Diagnosis.age_at_diagnosis',\n", - " 'ResearchSubject.Diagnosis.morphology',\n", - " 'ResearchSubject.Diagnosis.stage',\n", - " 'ResearchSubject.Diagnosis.grade',\n", - " 'ResearchSubject.Diagnosis.Treatment',\n", - " 'ResearchSubject.Diagnosis.Treatment.id',\n", - " 'ResearchSubject.Diagnosis.Treatment.identifier',\n", - " 'ResearchSubject.Diagnosis.Treatment.identifier.system',\n", - " 'ResearchSubject.Diagnosis.Treatment.identifier.value',\n", - " 'ResearchSubject.Diagnosis.Treatment.treatment_type',\n", - " 'ResearchSubject.Diagnosis.Treatment.treatment_outcome',\n", - " 'ResearchSubject.Diagnosis.Treatment.days_to_treatment_start',\n", - " 'ResearchSubject.Diagnosis.Treatment.days_treatment_end',\n", - " 'ResearchSubject.File',\n", - " 'ResearchSubject.File.id',\n", - " 'ResearchSubject.File.identifier',\n", - " 'ResearchSubject.File.identifier.system',\n", - " 'ResearchSubject.File.identifier.value',\n", - " 'ResearchSubject.File.label',\n", - " 'ResearchSubject.File.data_category',\n", - " 'ResearchSubject.File.data_type',\n", - " 'ResearchSubject.File.file_format',\n", - " 'ResearchSubject.File.associated_project',\n", - " 'ResearchSubject.File.drs_uri',\n", - " 'ResearchSubject.File.byte_size',\n", - " 'ResearchSubject.File.checksum',\n", - " 'ResearchSubject.Specimen',\n", - " 'ResearchSubject.Specimen.id',\n", - " 'ResearchSubject.Specimen.identifier',\n", - " 'ResearchSubject.Specimen.identifier.system',\n", - " 'ResearchSubject.Specimen.identifier.value',\n", - " 'ResearchSubject.Specimen.associated_project',\n", - " 'ResearchSubject.Specimen.age_at_collection',\n", - " 'ResearchSubject.Specimen.primary_disease_type',\n", - " 'ResearchSubject.Specimen.anatomical_site',\n", - " 'ResearchSubject.Specimen.source_material_type',\n", - " 'ResearchSubject.Specimen.specimen_type',\n", - " 'ResearchSubject.Specimen.derived_from_specimen',\n", - " 'ResearchSubject.Specimen.derived_from_subject',\n", - " 'ResearchSubject.Specimen.File',\n", - " 'ResearchSubject.Specimen.File.id',\n", - " 'ResearchSubject.Specimen.File.identifier',\n", - " 'ResearchSubject.Specimen.File.identifier.system',\n", - " 'ResearchSubject.Specimen.File.identifier.value',\n", - " 'ResearchSubject.Specimen.File.label',\n", - " 'ResearchSubject.Specimen.File.data_category',\n", - " 'ResearchSubject.Specimen.File.data_type',\n", - " 'ResearchSubject.Specimen.File.file_format',\n", - " 'ResearchSubject.Specimen.File.associated_project',\n", - " 'ResearchSubject.Specimen.File.drs_uri',\n", - " 'ResearchSubject.Specimen.File.byte_size',\n", - " 'ResearchSubject.Specimen.File.checksum']" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "columns(verify=False)" - ] - }, - { - "cell_type": "markdown", - "id": "26229d50", - "metadata": {}, - "source": [ - "All of the above fields are what describes the highest entity in the data structure hierarchy – ```Patient``` entity. The first five fields represent ```Patient``` demographic information, while the ```ResearchSubject``` entity contains details that we are used to seeing within the nodes' ```Case``` record.\n", - "\n", - "One of the contributions of the CDA is aggregated ```ResearchSubject``` information. This means that all ```ResearchSubject``` records coming from the same subject are now gathered under the Patient entity. As we know, certain specimens are studied in multiple projects (being part of a single data node or multiple nodes) as different ```ResearchSubject``` entries. Those ```ResearchSubject``` entries are collected as a list under the ```ResearchSubject``` entity. One example of this is the patient record with ```id = TCGA-E2-A10A``` which contains two ```ResearchSubject``` entries, one from GDC and the other from PDC.\n", - "\n", - "Note that the ```ResearchSubject``` entity is a list of records, as many other entities above are. **There are certain considerations that should be made when creating the queries by using the fields that come from lists, but more about that will follow in examples below**.\n", - "\n", - "The names in the list may look familiar to you, but they may have been renamed or restructured in the CDA. The field name mappings are described in the _CDA Schema Field Mapping_ document that is linked in the _Testing Guide_. A more direct way to explore and understand the fields is to use the ```unique_terms()``` function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47da0214", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.Specimen.source_material_type\",limit=10)" - ] - }, - { - "cell_type": "markdown", - "id": "e96d2c77", - "metadata": {}, - "source": [ - "Additionally, you can specify a particular data node by using the ```system``` argument:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f922031e", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms(\"ResearchSubject.Specimen.source_material_type\", system=\"PDC\")" - ] - }, - { - "cell_type": "markdown", - "id": "1f6396f2", - "metadata": {}, - "source": [ - "Now, let's dive into the querying!\n", - "\n", - "We can start by getting the record for ```id = TCGA-E2-A10A``` that we mentioned earlier:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ac11f401", - "metadata": {}, - "outputs": [], - "source": [ - "q = Q('id = \"TCGA-E2-A10A\"') # note the double quotes for the string value\n", - "\n", - "r = q.run()\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3936e82a", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "b83e303d", - "metadata": {}, - "source": [ - "We see that we've got a single patient record as a result, which is what we expect.\n", - "\n", - "Let's see how the result looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1aa11a6f", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "r[0]" - ] - }, - { - "cell_type": "markdown", - "id": "e97085f8", - "metadata": {}, - "source": [ - "The record is pretty large, so we'll print out ```identifier``` values for each ```ResearchSubject``` to confirm that we have one ```ResearchSubject``` that comes from GDC, and one that comes from PDC:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "88625cfa", - "metadata": {}, - "outputs": [], - "source": [ - "for research_subject in r[0]['ResearchSubject']:\n", - " print(research_subject['identifier'])" - ] - }, - { - "cell_type": "markdown", - "id": "8b48f8d2", - "metadata": {}, - "source": [ - "The values represent ```ResearchSubject``` IDs and are equivalent to ```case_id``` values in data nodes." - ] - }, - { - "cell_type": "markdown", - "id": "3a783bd7", - "metadata": {}, - "source": [ - "## Example queries" - ] - }, - { - "cell_type": "markdown", - "id": "c215820a", - "metadata": {}, - "source": [ - "Now that we can create a query with ```Q()``` function, let's see how we can combine multiple conditions.\n", - "\n", - "There are three operators available:\n", - "* ```And()```\n", - "* ```Or()```\n", - "* ```From()```\n", - "\n", - "The following examples show how those operators work in practice." - ] - }, - { - "cell_type": "markdown", - "id": "77acef39", - "metadata": {}, - "source": [ - "### Query 1\n", - "\n", - "**Find data for subjects who were diagnosed after the age of 50 and who were investigated as part of the TCGA-OV project.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d6da17a6", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Diagnosis.age_at_diagnosis > 50*365')\n", - "q2 = Q('ResearchSubject.associated_project = \"TCGA-OV\"')\n", - "\n", - "q = q1.AND(q2)\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "3d13b766", - "metadata": {}, - "source": [ - "### Query 2\n", - "\n", - "**Find data for donors with melanoma (Nevi and Melanomas) diagnosis and who were diagnosed before the age of 30.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6ce3248", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "\n", - "q = q1.AND(q2)\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "7621e34d", - "metadata": {}, - "source": [ - "In addition, we can check how many records come from particular systems by adding one more condition to the query:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "45ad21fd", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = Q('ResearchSubject.Specimen.primary_disease_type = \"Nevi and Melanomas\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.age_at_diagnosis < 30*365')\n", - "q3 = Q('ResearchSubject.Specimen.identifier.system = \"GDC\"')\n", - "\n", - "q = q1.AND(q2.AND(q3))\n", - "r = q.run()\n", - "\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "036bc88d", - "metadata": {}, - "source": [ - "By comparing the ```Count``` value of the two results we can see that all the patients returned in the initial query are coming from the GDC.\n", - "\n", - "To explore the results further, we can fetch the patient JSON objects by iterating through the results:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "df5deb25", - "metadata": {}, - "outputs": [], - "source": [ - "projects = set()\n", - "\n", - "for patient in r:\n", - " research_subjects = patient['ResearchSubject']\n", - " for rs in research_subjects:\n", - " projects.add(rs['associated_project'])\n", - "\n", - "print(projects)" - ] - }, - { - "cell_type": "markdown", - "id": "27df4ffc", - "metadata": {}, - "source": [ - "The output shows the projects where _Nevi and Melanomas_ cases appear." - ] - }, - { - "cell_type": "markdown", - "id": "ee0955c4", - "metadata": {}, - "source": [ - "### Query 3\n", - "\n", - "**Identify all samples that meet the following conditions:**\n", - "\n", - "* **Sample is from primary tumor**\n", - "* **Disease is ovarian or breast cancer**\n", - "* **Subjects are females under the age of 60 years**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e6709c55", - "metadata": {}, - "outputs": [], - "source": [ - "tumor_type = Q('ResearchSubject.Specimen.source_material_type = \"Primary Tumor\"')\n", - "disease1 = Q('ResearchSubject.primary_disease_site = \"Ovary\"')\n", - "disease2 = Q('ResearchSubject.primary_disease_site = \"Breast\"')\n", - "demographics1 = Q('sex = \"female\"')\n", - "demographics2 = Q('days_to_birth > -60*365') # note that days_to_birth is a negative value\n", - "\n", - "q1 = tumor_type.AND(demographics1.AND(demographics2))\n", - "q2 = disease1.OR(disease2)\n", - "q = q1.AND(q2)\n", - "\n", - "r = q.run()\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "22e0e66a", - "metadata": {}, - "source": [ - "In this case, we have a result that contains more than 1000 records which is the default page size. To load the next 1000 records, we can use the ```next_page()``` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c62c6074", - "metadata": {}, - "outputs": [], - "source": [ - "r2 = r.next_page()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0376b0d6", - "metadata": {}, - "outputs": [], - "source": [ - "print(r2)" - ] - }, - { - "cell_type": "markdown", - "id": "72b1f71b", - "metadata": {}, - "source": [ - "Alternatively, we can use the ```offset``` argument to specify the record to start from:\n", - "\n", - "```\n", - "...\n", - "r = q.run(offset=1000)\n", - "print(r)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "bd653b13", - "metadata": {}, - "source": [ - "### Query 4\n", - "\n", - "**Find data for donors with \"Ovarian Serous Cystadenocarcinoma\" with proteomic and genomic data.**" - ] - }, - { - "cell_type": "markdown", - "id": "92e0f930", - "metadata": {}, - "source": [ - "**Note that disease type value denoting the same disease groups can be completely different within different systems. This is where CDA features come into play.** We first start by exploring the values available for this particular field in both systems." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "cd201350", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/dboles/Documents/python/working/cda-python/cdapython/decorators_cache.py:18: ResourceWarning: unclosed \n", - " func.cache_clear()\n", - "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n" - ] - }, - { - "data": { - "text/plain": [ - "[None,\n", - " 'Acinar Cell Neoplasms',\n", - " 'Adenomas and Adenocarcinomas',\n", - " 'Adnexal and Skin Appendage Neoplasms',\n", - " 'Basal Cell Neoplasms',\n", - " 'Blood Vessel Tumors',\n", - " 'Chronic Myeloproliferative Disorders',\n", - " 'Complex Epithelial Neoplasms',\n", - " 'Complex Mixed and Stromal Neoplasms',\n", - " 'Cystic, Mucinous and Serous Neoplasms']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "unique_terms('ResearchSubject.primary_disease_type', system=\"GDC\",limit=10,verify=False)" - ] - }, - { - "cell_type": "markdown", - "id": "b3eecdd5", - "metadata": {}, - "source": [ - "Since “Ovarian Serous Cystadenocarcinoma” doesn’t appear in GDC values we decide to look into the PDC:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "706f0301", - "metadata": {}, - "outputs": [], - "source": [ - "unique_terms('ResearchSubject.primary_disease_type', system=\"PDC\")" - ] - }, - { - "cell_type": "markdown", - "id": "3a3476ed", - "metadata": {}, - "source": [ - "After examining the output, we see that it does come from the PDC. Hence, if we could first identify the data that has research subjects found within the PDC that have this particular disease type, and then further narrow down the results to include only the portion of the data that is present in GDC, we could get the records that we are looking for." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "637640e2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Getting results from database\n", - "\n", - "Total execution time: 27307 ms\n", - "\n", - " QueryID: bd084bbd-33bd-4339-b034-b620192922b1\n", - " Query: SELECT all_v2.* FROM (SELECT all_v2.* FROM gdc-bq-sample.integration.all_v2 AS all_v2, UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier WHERE ((_ResearchSubject.primary_disease_type = 'Ovarian Serous Cystadenocarcinoma') AND (_identifier.system = 'PDC'))) AS all_v2, UNNEST(ResearchSubject) AS _ResearchSubject, UNNEST(_ResearchSubject.identifier) AS _identifier WHERE (_identifier.system = 'GDC')\n", - " Offset: 0\n", - " Count: 100\n", - " Total Row Count: 275\n", - " More pages: True\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/dboles/Documents/python/working/cda-python/venv/lib/python3.7/site-packages/ipykernel_launcher.py:6: ResourceWarning: unclosed \n", - " \n", - "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n" - ] - } - ], - "source": [ - "q1 = Q('ResearchSubject.primary_disease_type = \"Ovarian Serous Cystadenocarcinoma\"')\n", - "q2 = Q('ResearchSubject.identifier.system = \"PDC\"')\n", - "q3 = Q('ResearchSubject.identifier.system = \"GDC\"')\n", - "\n", - "q = q3.FROM(q1.AND(q2))\n", - "r = q.run(verify=False)\n", - "\n", - "print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "9ea451f4", - "metadata": {}, - "source": [ - "As you can see, this is achieved by utilizing ```From``` operator. The ```From``` operator allows us to create queries from results of other queries. This is particularly useful when working with conditions that involve a single field which can take multiple different values for different items in a list that is being part of, e.g. we need ```ResearchSubject.identifier.system``` to be both “PDC” and “GDC” for a single patient. In such cases, ```And``` operator can’t help because it will return those entries where the field takes both values, which is zero entries." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "da890a83", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'days_to_birth', 'field_path': 'days_to_birth', 'data_type': 'INT64', 'description': None}\n", - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'race', 'field_path': 'race', 'data_type': 'STRING', 'description': None}\n", - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'sex', 'field_path': 'sex', 'data_type': 'STRING', 'description': None}\n", - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'ethnicity', 'field_path': 'ethnicity', 'data_type': 'STRING', 'description': None}\n", - "{'table_catalog': 'gdc-bq-sample', 'table_schema': 'cda_mvp', 'table_name': 'v3', 'column_name': 'id', 'field_path': 'id', 'data_type': 'STRING', 'description': None}\n" - ] - } - ], - "source": [ - "for i in Q.sql(\"SELECT * FROM `gdc-bq-sample.cda_mvp.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` WHERE table_name = 'v3' Limit 5\",verify=False):\n", - " print(i)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "231e5519", - "metadata": {}, - "outputs": [], - "source": [ - "q1 = query('ResearchSubject.identifier.system = \"GDC\" FROM ResearchSubject.primary_disease_type = \"Ovarian Serous Cystadenocarcinoma\" AND ResearchSubject.identifier.system = \"PDC\"')\n", - "result = q1.run(async_call=True)\n", - "print(result)\n" - ] - }, - { - "cell_type": "markdown", - "id": "716356cc", - "metadata": {}, - "source": [ - "## Data extraction and release information" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d420653", - "metadata": {}, - "outputs": [], - "source": [ - "# If you are interested in the extraction dates or data release versions of GDC, PDC, or IDC that is in a table or view, execute this code\n", - "\n", - "for i in Q.sql(\"SELECT option_value FROM `gdc-bq-sample.integration.INFORMATION_SCHEMA.TABLE_OPTIONS` WHERE table_name = 'all_v1'\"):\n", - " print(i)" - ] - }, - { - "cell_type": "markdown", - "id": "e1a235eb", - "metadata": {}, - "source": [ - "## Test queries" - ] - }, - { - "cell_type": "markdown", - "id": "9740d86e", - "metadata": {}, - "source": [ - "Now that we've successfully run and analyzed a few queries, here are a few additional ones you can try out on your own.\n", - "\n", - "Solutions can be shared with the CDA team as indicated in the _Testing Guide_ document." - ] - }, - { - "cell_type": "markdown", - "id": "484df50b", - "metadata": {}, - "source": [ - "### Test Query 1\n", - "\n", - "**Find data from TCGA-BRCA project, with donors over the age of 50 with Stage IIIC cancer.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "76e5ff7c", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "dfebb4f8", - "metadata": {}, - "source": [ - "### Test Query 2\n", - "\n", - "**Find data from all patients who have been treated with \"Radiation Therapy, NOS\" and have both genomic and proteomic data.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8fab696d", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "cell_type": "markdown", - "id": "cf6e1ba8", - "metadata": {}, - "source": [ - "### Test Query 3\n", - "\n", - "**Find data from all subjects with lung adenocarcinomas that have both primary and recurrent tumors.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13b82aef", - "metadata": {}, - "outputs": [], - "source": [ - "# Solution\n", - "\n", - "# ...\n", - "\n", - "# print(r)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a84d6fcc", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "Q('ResearchSubject.id = \"c5421e34-e5c7-4ba5-aed9-146a5575fd8d\"').run().pretty_print(-1) " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.10.4 ('venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - }, - "metadata": { - "interpreter": { - "hash": "5c867d1980d5b66d2bfc8a5903dcee074b4b68f3917a4f27f8a310cab24e9f1b" - } - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/testa.ipynb b/notebooks/testa.ipynb deleted file mode 100644 index cca9e32e..00000000 --- a/notebooks/testa.ipynb +++ /dev/null @@ -1,131 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
totalfilessystemsexraceethnicitycause_of_death
0{0: 65}{0: 45342}{0: [{'system': 'GDC', 'count': 65}, {'system'...{0: [{'sex': 'male', 'count': 47}, {'sex': 'fe...{0: [{'race': 'white', 'count': 38}, {'race': ...{0: [{'ethnicity': 'not hispanic or latino', '...{0: [{'cause_of_death': 'Not Reported', 'count...
\n", - "
" - ], - "text/plain": [ - " total files system \\\n", - "0 {0: 65} {0: 45342} {0: [{'system': 'GDC', 'count': 65}, {'system'... \n", - "\n", - " sex \\\n", - "0 {0: [{'sex': 'male', 'count': 47}, {'sex': 'fe... \n", - "\n", - " race \\\n", - "0 {0: [{'race': 'white', 'count': 38}, {'race': ... \n", - "\n", - " ethnicity \\\n", - "0 {0: [{'ethnicity': 'not hispanic or latino', '... \n", - "\n", - " cause_of_death \n", - "0 {0: [{'cause_of_death': 'Not Reported', 'count... " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd \n", - "d = pd.DataFrame(\n", - " [\n", - " {'total': {0: 65},\n", - " 'files': {0: 45342},\n", - " 'system': {0: [{'system': 'GDC', 'count': 65},\n", - " {'system': 'IDC', 'count': 65},\n", - " {'system': 'PDC', 'count': 65}]},\n", - " 'sex': {0: [{'sex': 'male', 'count': 47}, {'sex': 'female', 'count': 18}]},\n", - " 'race': {0: [{'race': 'white', 'count': 38},\n", - " {'race': 'not reported', 'count': 26},\n", - " {'race': 'asian', 'count': 1}]},\n", - " 'ethnicity': {0: [{'ethnicity': 'not hispanic or latino', 'count': 26},\n", - " {'ethnicity': 'not reported', 'count': 35},\n", - " {'ethnicity': 'hispanic or latino', 'count': 4}]},\n", - " 'cause_of_death': {0: [{'cause_of_death': 'Not Reported', 'count': 63},\n", - " {'cause_of_death': 'Cancer Related', 'count': 2}]}}\n", - " \n", - " \n", - " \n", - " ]\n", - "\n", - ")\n", - "\n", - "\n", - "d\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - }, - "vscode": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/testing_sql.ipynb b/notebooks/testing_sql.ipynb deleted file mode 100644 index 06c837e9..00000000 --- a/notebooks/testing_sql.ipynb +++ /dev/null @@ -1,125 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "33d30b80-163c-4b18-a745-6faecdb2d98c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2022.6.28\n", - "everything is fine\n" - ] - } - ], - "source": [ - "from matplotlib import pyplot\n", - "from cdapython import Q \n", - "print(Q.get_version())\n", - "print(Q.bigquery_status())\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "013b417f-674f-47de-a235-66d84c9f4897", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Getting results from database\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "Getting results from database\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total execution time: 3628 ms\n" - ] - }, - { - "data": { - "text/plain": [ - "{'total': {0: 65},\n", - " 'files': {0: 45342},\n", - " 'system': {0: [{'system': 'GDC', 'count': 65},\n", - " {'system': 'IDC', 'count': 65},\n", - " {'system': 'PDC', 'count': 65}]},\n", - " 'sex': {0: [{'sex': 'male', 'count': 47}, {'sex': 'female', 'count': 18}]},\n", - " 'race': {0: [{'race': 'white', 'count': 38},\n", - " {'race': 'not reported', 'count': 26},\n", - " {'race': 'asian', 'count': 1}]},\n", - " 'ethnicity': {0: [{'ethnicity': 'not hispanic or latino', 'count': 26},\n", - " {'ethnicity': 'not reported', 'count': 35},\n", - " {'ethnicity': 'hispanic or latino', 'count': 4}]},\n", - " 'cause_of_death': {0: [{'cause_of_death': 'Not Reported', 'count': 63},\n", - " {'cause_of_death': 'Cancer Related', 'count': 2}]}}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pandas import json_normalize\n", - "q1 = Q('ResearchSubject.Diagnosis.stage = \"Stage I\"')\n", - "q2 = Q('ResearchSubject.Diagnosis.stage = \"Stage II\"')\n", - "q3 = Q(\"ResearchSubject.primary_diagnosis_site = 'Kidney'\")\n", - "q_diag = q1.OR(q2)\n", - "q = q_diag.AND(q3)\n", - "# print(q.counts.run())\n", - "qsub = q.subject.count.run(show_sql=True)\n", - "%matplotlib inline\n", - "qsub.to_dataframe().to_dict()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4f5cd0f-b990-4096-a91f-c84997019d39", - "metadata": {}, - "outputs": [], - "source": [ - "\n" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "4578c8680ee810f847df558484335f5ffb0f004d38a87276387030f59580c508" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.14" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}