Skip to content
This repository has been archived by the owner on Mar 13, 2024. It is now read-only.

Commit

Permalink
Merge pull request #2 from vanallenlab/2021-08-11
Browse files Browse the repository at this point in the history
2021 July 7 release
  • Loading branch information
brendanreardon authored Aug 11, 2021
2 parents 1775b1f + b66f443 commit 4fc8f3a
Show file tree
Hide file tree
Showing 241 changed files with 239,741 additions and 91,013 deletions.
52 changes: 52 additions & 0 deletions analyses/knowledge-bases/civic-oncokb/create-source-data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import openpyxl\n",
"\n",
"folder = \"../../../extended-data/extended-data-fig-4/\"\n",
"filename = \"source-data-extended-data-fig-4.xlsx\"\n",
"writer = pd.ExcelWriter(f'{folder}/{filename}', engine='xlsxwriter')\n",
"\n",
"tuples = [('pmid', 'a'), ('drug', 'b'), ('gene', 'c')]\n",
"for handle, label in tuples:\n",
" df = pd.read_csv(f\"{handle}-comparison.txt\", sep='\\t')\n",
" df.to_excel(writer, sheet_name=f\"Extended Data Fig. 4{label}\", index=False)\n",
"writer.save()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "moalmanac-paper",
"language": "python",
"name": "moalmanac-paper"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
5 changes: 0 additions & 5 deletions analyses/knowledge-bases/civic-oncokb/move-to-folders.sh

This file was deleted.

36 changes: 27 additions & 9 deletions analyses/knowledge-bases/civic-oncokb/upset.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,35 @@ drugs <- read.csv('drug-comparison.txt', sep='\t', header=T)
genes <- read.csv('gene-comparison.txt', sep='\t', header=T)
pmids <- read.csv('pmid-comparison.txt', sep='\t', header=T)

png('~/Github/moalmanac-paper/figures/supplementary-figure-13/supplementary-figure-13.png', width=800, height=600)
upset(pmids, sets = c("CIViC", "MOAlmanac", "OncoKB"), sets.bar.color = "#56B4E9", order.by = "freq", text.scale=2)
grid.text("Comparison of PubMed IDs catalogued by\nCIViC, MOAlmanac, and OncoKB",x = 0.65, y=0.95, gp=gpar(fontsize=20))
## PDF
pdf("~/Github/moalmanac-paper/extended-data-figures/extended-data-fig-4/extended-data-fig-4a.pdf", paper='a4')
upset(pmids, sets = c("CIViC", "MOAlmanac", "OncoKB"), sets.bar.color = "#56B4E9", order.by = "freq", text.scale=1.5,
mainbar.y.label="PubMed IDs\nintersection size")
dev.off()

png('~/Github/moalmanac-paper/figures/supplementary-figure-14/supplementary-figure-14.png', width=800, height=600)
upset(drugs, sets = c("CIViC", "MOAlmanac", "OncoKB"), sets.bar.color = "#56B4E9", order.by = "freq", text.scale=2)
grid.text("Comparison of therapies catalogued by\nCIViC, MOAlmanac, and OncoKB",x = 0.65, y=0.95, gp=gpar(fontsize=20))
pdf("~/Github/moalmanac-paper/extended-data-figures/extended-data-fig-4/extended-data-fig-4b.pdf", paper='a4')
upset(drugs, sets = c("CIViC", "MOAlmanac", "OncoKB"), sets.bar.color = "#56B4E9", order.by = "freq", text.scale=1.5,
mainbar.y.label="Therapies\nintersection size")
dev.off()

png('~/Github/moalmanac-paper/figures/supplementary-figure-15/supplementary-figure-15.png', width=800, height=600)
upset(genes, sets = c("CIViC", "MOAlmanac", "OncoKB"), sets.bar.color = "#56B4E9", order.by = "freq", text.scale=2)
grid.text("Comparison of genes catalogued by\nCIViC, MOAlmanac, and OncoKB",x = 0.65, y=0.95, gp=gpar(fontsize=20))
pdf("~/Github/moalmanac-paper/extended-data-figures/extended-data-fig-4/extended-data-fig-4c.pdf", paper='a4')
upset(genes, sets = c("CIViC", "MOAlmanac", "OncoKB"), sets.bar.color = "#56B4E9", order.by = "freq", text.scale=1.5,
mainbar.y.label="Genes\nintersection size")
dev.off()

## PNG
png("~/Github/moalmanac-paper/extended-data-figures/extended-data-fig-4/extended-data-fig-4a.png", res=300, width=1000, height=750)
upset(pmids, sets = c("CIViC", "MOAlmanac", "OncoKB"), sets.bar.color = "#56B4E9", order.by = "freq", text.scale=1,
mainbar.y.label="PubMed IDs\nintersection size")
dev.off()

png("~/Github/moalmanac-paper/extended-data-figures/extended-data-fig-4/extended-data-fig-4b.png", res=300, width=1000, height=750)
upset(drugs, sets = c("CIViC", "MOAlmanac", "OncoKB"), sets.bar.color = "#56B4E9", order.by = "freq", text.scale=1,
mainbar.y.label="Therapies\nintersection size")
dev.off()

png("~/Github/moalmanac-paper/extended-data-figures/extended-data-fig-4/extended-data-fig-4c.png", res=300, width=1000, height=750)
upset(genes, sets = c("CIViC", "MOAlmanac", "OncoKB"), sets.bar.color = "#56B4E9", order.by = "freq", text.scale=1,
mainbar.y.label="Genes\nintersection size")
dev.off()

2 changes: 0 additions & 2 deletions analyses/knowledge-bases/count-genes/README.md

This file was deleted.

97 changes: 0 additions & 97 deletions analyses/knowledge-bases/count-genes/count-genes.ipynb

This file was deleted.

4 changes: 2 additions & 2 deletions analyses/knowledge-bases/molecular-oncology-almanac/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Molecular Oncology Almanac
These notebooks perform calculations and generate a few figures.
- `count-combination-therapies.ipynb` counts the combination therapies present in the database
- `create-supplementary-table.ipynb` creates supplementary table 1, which lists an TARGET annotated version of TARGET and all catalogued entries in MOAlmanac
- `illustrate-moalmanac-breakdown.ipynb` creates a figure, Figure 1c
- `create-supplementary-table.ipynb` creates supplementary table 2, which lists an TARGET annotated version of TARGET and all catalogued entries in MOAlmanac
- `illustrate-moalmanac-breakdown.ipynb` creates a figure, Fig. 1c
Original file line number Diff line number Diff line change
Expand Up @@ -29,32 +29,110 @@
"import pandas as pd\n",
"import glob \n",
"\n",
"filename = 'supplementary-table-2'\n",
"folder = f'../../../tables/supplementary-table-02'\n",
"!mkdir -p $folder\n",
"outname = f'{folder}/{filename}.xlsx'\n",
"\n",
"options = {}\n",
"options['strings_to_formulas'] = False\n",
"options['strings_to_urls'] = False\n",
"\n",
"writer = pd.ExcelWriter(outname)\n",
"column_order = ['disease', \n",
" 'context', \n",
" 'oncotree_term', \n",
" 'oncotree_code', \n",
" 'therapy_name', \n",
" 'therapy_strategy', \n",
" 'therapy_type', \n",
" 'therapy_sensitivity',\n",
" 'therapy_resistance', \n",
" 'favorable_prognosis', \n",
" 'predictive_implication',\n",
" 'description', \n",
" 'preferred_assertion', \n",
" 'source_type', \n",
" 'citation', \n",
" 'url', \n",
" 'doi',\n",
" 'pmid', \n",
" 'nct', \n",
" 'last_updated', \n",
" 'adverse_event_risk', \n",
" 'feature_type', \n",
" 'gene', \n",
" 'chromosome', \n",
" 'start_position', \n",
" 'end_position', \n",
" 'reference_allele', \n",
" 'alternate_allele', \n",
" 'cdna_change', \n",
" 'protein_change', \n",
" 'variant_annotation', \n",
" 'exon', \n",
" 'rsid', \n",
" 'pathogenic', \n",
" 'direction', \n",
" 'cytoband',\n",
" 'gene1', \n",
" 'gene2', \n",
" 'rearrangement_type', \n",
" 'classification', \n",
" 'minimum_mutations', \n",
" 'mutations_per_mb',\n",
" #'minimum_neoantigens',\n",
" 'cosmic_signature_number',\n",
" 'cosmic_signature_version',\n",
" 'event', \n",
" 'locus', \n",
" 'status', \n",
" 'technique'\n",
"]\n",
"\n",
"target = pd.read_excel('../target/almanac-comparison.xlsx')\n",
"target.to_excel(writer, sheet_name='TARGET')\n",
"dtype_map = {\n",
" 'aneuploidy': 'Aneuploidy',\n",
" 'copy_number': 'Copy Number',\n",
" 'germline_variant': 'Germline variant',\n",
" 'knockdown': 'Knockdown',\n",
" 'microsatellite_stability': 'Microsatellite stability',\n",
" 'mutational_burden': 'Mutational burden',\n",
" 'mutational_signature': 'Mutational signature',\n",
" 'neoantigen_burden': 'Neoantigen burden',\n",
" 'rearrangement': 'Rearrangement',\n",
" 'silencing': 'Silencing',\n",
" 'somatic_variant': 'Somatic variant'\n",
"}\n",
"\n",
"sheet_names = []\n",
"sheets = []\n",
"handles = glob.glob('../moalmanac-db/content/*')\n",
"for handle in sorted(handles):\n",
" dtype = handle.split('/')[-1].split('.tsv')[0]\n",
" if dtype == 'microsatellite_stability':\n",
" dtype = 'msi'\n",
" sheet_name = f'moalmanac-{dtype}'\n",
" sheet_names.append(sheet_name)\n",
" \n",
" feature_type = dtype_map[dtype] \n",
" tmp = pd.read_csv(handle, sep='\\t')\n",
" tmp.to_excel(writer, sheet_name=sheet_name, index=False)\n",
" \n",
" tmp['feature_type'] = feature_type\n",
" sheets.append(tmp)\n",
"\n",
"\n",
"sort_columns = ['gene', 'gene1', 'gene2', 'therapy_sensitivity', 'therapy_resistance', 'favorable_prognosis', 'disease', 'therapy_name']\n",
"df = (pd\n",
" .concat(sheets, ignore_index=True)\n",
" .loc[:, column_order]\n",
" .sort_values(sort_columns, ascending=True)\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"filename = 'supplementary-table-2'\n",
"folder = f'../../../tables/supplementary-table-02'\n",
"!mkdir -p $folder\n",
"outname = f'{folder}/{filename}.xlsx'\n",
"\n",
"writer = pd.ExcelWriter(outname)\n",
"\n",
"target = pd.read_excel('../target/almanac-comparison.xlsx')\n",
"target.to_excel(writer, sheet_name='TableS2-TARGET', index=False)\n",
"df.to_excel(writer, sheet_name='TableS2-MOAlmanac', index=False)\n",
"\n",
"writer.save()"
]
},
Expand Down
Loading

0 comments on commit 4fc8f3a

Please sign in to comment.