-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
2,839 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,242 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<img src=\"../fasp/runner/credits/images/FASPNotebook09.jpg\" style=\"float: right;\">\n", | ||
"\n", | ||
"### BioDataCatalyst and CRDC Search-Locate-Compute\n", | ||
"\n", | ||
"* Search:\n", | ||
" * CRDC - GECCO Data\n", | ||
" * BioDataCatalyst - COPDGene\n", | ||
"* Locate:\n", | ||
" * CRDC DRS Service\n", | ||
" * BioDataCatalyst DRS Service\n", | ||
"* Compute: \n", | ||
" * CRDC - Seven Bridges CGC\n", | ||
" * ISB-CGC (Google Cloud)\n", | ||
"\n", | ||
"This copy of the FASPNotebook09 was created for the GA4GH Connect meeting April 2023." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from fasp.runner import FASPRunner\n", | ||
"\n", | ||
"# The implementations we're using\n", | ||
"from fasp.loc import DRSMetaResolver\n", | ||
"from fasp.workflow import GCPLSsamtools, sbcgcWESClient\n", | ||
"from fasp.search import BigQuerySearchClient, DataConnectClient\n", | ||
"\n", | ||
"faspRunner = FASPRunner(program='FASPNotebook09.ipynb')\n", | ||
"runNote = 'Two dbGaP Sources, CRDC and BioDataCatalyst'" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Step 1 - Discovery\n", | ||
"Query for relevant DRS objects" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Searching the GA4GH registry for org.ga4gh:drs services\n", | ||
"Running query\n", | ||
"SELECT sp.dbGaP_Subject_ID,\n", | ||
"'sbcgc:'||sb_drs_id FROM collections.public_datasets.dbgap_scr_gecco_susceptibility_subject_phenotypes_multi sp\n", | ||
"join collections.public_datasets.dbgap_scr_gecco_susceptibility_sample_multi sm on sm.dbgap_subject_id = sp.dbgap_subject_id\n", | ||
"join collections.public_datasets.dbgap_scr_gecco_susceptibility_sb_drs_index di on di.sample_id = sm.sample_id \n", | ||
"where AGE between 45 and 55 and sex = 'Female' and file_type = 'cram' limit 3\n", | ||
"_Retrieving the query_\n", | ||
"____Page1_______________\n", | ||
"____Page2_______________\n", | ||
"____Page3_______________\n", | ||
"____Page4_______________\n", | ||
"____Page5_______________\n", | ||
"____Page6_______________\n", | ||
"subject=2474312, drsID=sbcgc:5baa8cece4b0db63859e6590\n", | ||
"workflow submitted, run:cb4713f5-568e-4019-b86b-a3a27140dc8e\n", | ||
"____________________________________________________________\n", | ||
"subject=2473610, drsID=sbcgc:5baa8d0be4b0db63859e6843\n", | ||
"workflow submitted, run:48d44c09-6d90-4318-81e1-16b5fe5b1d68\n", | ||
"____________________________________________________________\n", | ||
"subject=2474054, drsID=sbcgc:5baa8b9de4b0db63859e5f33\n", | ||
"workflow submitted, run:61b28168-9b21-4f1a-aa3d-3d0a97140cfa\n", | ||
"____________________________________________________________\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# TCGA Query - CRDC\n", | ||
"crdcquery = '''SELECT sp.dbGaP_Subject_ID,\n", | ||
"'sbcgc:'||sb_drs_id \n", | ||
"\n", | ||
"FROM dbgap_demo.scr_gecco_susceptibility.subject_phenotypes_multi sp\n", | ||
"\n", | ||
"join dbgap_demo.scr_gecco_susceptibility.sample_multi sm on sm.dbgap_subject_id = sp.dbgap_subject_id\n", | ||
"\n", | ||
"join dbgap_demo.scr_gecco_susceptibility.sb_drs_index di on di.sample_id = sm.sample_id \n", | ||
"\n", | ||
"where AGE between 45 and 55 and sex = 'Female' and file_type = 'cram' limit 3'''\n", | ||
"\n", | ||
"\n", | ||
"bdcquery = '''\n", | ||
" SELECT sp.dbGaP_Subject_ID, 'bdc:'||read_drs_id\n", | ||
"\n", | ||
"FROM `isbcgc-216220.COPDGene.Subject_MULTI` sm\n", | ||
"\n", | ||
"join `isbcgc-216220.COPDGene.Subject_Phenotypes_HMB` sp on sp.dbgap_subject_id = sm.dbgap_subject_id\n", | ||
"\n", | ||
"join `isbcgc-216220.COPDGene.COPD_DRS` drs on drs.su_submitter_id = sm.subject_id\n", | ||
"\n", | ||
"where gender = '2' and Age_Enroll between 45 and 55\n", | ||
"\n", | ||
"LIMIT 3'''\n", | ||
"\n", | ||
"searchClient = DataConnectClient('https://publisher-data.publisher.dnastack.com/data-connect/')\n", | ||
"drsClient = DRSMetaResolver()\n", | ||
"\n", | ||
"# Step 3 - set up a class that runs samtools for us\n", | ||
"# providing the location for the resultssettings = faspRunner.settings\n", | ||
"settings = faspRunner.settings\n", | ||
"wesClient = sbcgcWESClient(settings['SevenBridgesProject'])\n", | ||
"\n", | ||
"faspRunner.configure(searchClient, drsClient, wesClient)\n", | ||
"runList = faspRunner.runQuery(crdcquery, runNote)" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### BioDataCatalyst runs\t" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Running query\n", | ||
"\n", | ||
" SELECT sp.dbGaP_Subject_ID, 'bdc:'||read_drs_id\n", | ||
" FROM `isbcgc-216220.COPDGene.Subject_MULTI` sm\n", | ||
" join `isbcgc-216220.COPDGene.Subject_Phenotypes_HMB` sp on sp.dbgap_subject_id = sm.dbgap_subject_id\n", | ||
" join `isbcgc-216220.COPDGene.COPD_DRS` drs on drs.su_submitter_id = sm.subject_id\n", | ||
" where gender = '2'\n", | ||
" and Age_Enroll between 45 and 55\n", | ||
" LIMIT 3\n", | ||
"subject=599754, drsID=bdc:dg.4503/22b61280-5d92-413b-bca6-ae674ee16d14\n", | ||
"workflow submitted, run:10796449958580678646\n", | ||
"____________________________________________________________\n", | ||
"subject=600588, drsID=bdc:dg.4503/e5529a06-5022-42f1-bddb-761d34d2f4f1\n", | ||
"workflow submitted, run:2497755560539929518\n", | ||
"____________________________________________________________\n", | ||
"subject=595520, drsID=bdc:dg.4503/35599dbc-e531-4a83-97a4-6a74587d2a78\n", | ||
"workflow submitted, run:17902599479779506025\n", | ||
"____________________________________________________________\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"searchClient = BigQuerySearchClient()\n", | ||
"gcplocation = 'projects/{}/locations/{}'.format(settings['GCPProject'], settings['GCPPipelineRegion'])\n", | ||
"wesClient = GCPLSsamtools(gcplocation, settings['GCPOutputBucket'])\n", | ||
"\n", | ||
"\n", | ||
"faspRunner.configure(searchClient, drsClient, wesClient)\n", | ||
"runList = faspRunner.runQuery(bdcquery, runNote)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAHgAAAA8CAIAAAAiz+n/AAAAw0lEQVR4nO3bsQ3DMBAEQdL1qP9K1M+7AgdKViA8U8FjAyYH7rVmHWVm3ff99hXPXNf1efuGfyF0ROiI0BGhI0JHhI4IHRE6InRE6IjQEaEjQkeEjggdEToidGTPHDZlHWqfuBnu/fYRD814OipCR4SOCB0ROiJ0ROiI0BGhI0JHhI4IHRE6InRE6IjQEaEjQkdshpEjN0P/DPlJ6IjQEaEjQkeEjggdEToidEToiNARoSNCR4SOCB0ROiJ0ROiIzTDyBR7CGmFwt+geAAAAAElFTkSuQmCC", | ||
"text/plain": [ | ||
"<PIL.Image.Image image mode=RGB size=120x60 at 0x11D87E940>" | ||
] | ||
}, | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"faspRunner.getFASPicon()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"DataConnectClient BigQuerySearchClient \n", | ||
"DRSMetaResolver DRSMetaResolver \n", | ||
"sbcgcWESClient GCPLSsamtools \n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"faspRunner.rollCredits()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.20" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.