diff --git a/proxbias/data/AdamsonWeissman2016_GSM2406681_10X010_b5_w100_n150.csv b/proxbias/data/AdamsonWeissman2016_GSM2406681_10X010_b5_w100_n150.csv index de990d1..cfc19e0 100644 --- a/proxbias/data/AdamsonWeissman2016_GSM2406681_10X010_b5_w100_n150.csv +++ b/proxbias/data/AdamsonWeissman2016_GSM2406681_10X010_b5_w100_n150.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd798bb2fe209b88d2a17ec8c69073c2697bb212e72df751f61c770b791c7565 -size 2629028 +oid sha256:62d838e9ddceaa2dfead2b923c7a8f6a7f3399c834147d3876c570a5c2f3ef54 +size 2420829 diff --git a/proxbias/data/FrangiehIzar2021_RNA_b5_w100_n150.csv b/proxbias/data/FrangiehIzar2021_RNA_b5_w100_n150.csv index 68c9d52..27d2c56 100644 --- a/proxbias/data/FrangiehIzar2021_RNA_b5_w100_n150.csv +++ b/proxbias/data/FrangiehIzar2021_RNA_b5_w100_n150.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb437c759c2f2f5b7ea2a65bf89cdb7054029af07dc368bd158df1de70d7e281 -size 24452228 +oid sha256:dd9e537fde4c15875380da29e891fdff9c581fa2b99e3883201d636043f43522 +size 21478256 diff --git a/proxbias/data/PapalexiSatija2021_eccite_RNA_b5_w100_n150.csv b/proxbias/data/PapalexiSatija2021_eccite_RNA_b5_w100_n150.csv index 68adb84..533582a 100644 --- a/proxbias/data/PapalexiSatija2021_eccite_RNA_b5_w100_n150.csv +++ b/proxbias/data/PapalexiSatija2021_eccite_RNA_b5_w100_n150.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:947966b684c34efa1f58baa901927c482ba98a80e7e2bf7130702788fe0a7285 -size 323105 +oid sha256:1d4c75e31a76d1b380071801c9bef342a043bc2a50eccd2eb63735317b0565c2 +size 274862 diff --git a/proxbias/data/ReplogleWeissman2022_rpe1_b5_w100_n150.csv b/proxbias/data/ReplogleWeissman2022_rpe1_b5_w100_n150.csv index 1f43e97..dd7a735 100644 --- a/proxbias/data/ReplogleWeissman2022_rpe1_b5_w100_n150.csv +++ b/proxbias/data/ReplogleWeissman2022_rpe1_b5_w100_n150.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b773f6f5309dfe0407970993b73939fbc4fb445105941504fb98c0afc13e83f -size 606829502 +oid sha256:56331763d8ddc27891afc7dfaa87b9136ea240c834db902ce8b7d8f80ba12f28 +size 636804036 diff --git a/proxbias/data/TianKampmann2021_CRISPRi_b5_w100_n150.csv b/proxbias/data/TianKampmann2021_CRISPRi_b5_w100_n150.csv index 0a5c176..e562866 100644 --- a/proxbias/data/TianKampmann2021_CRISPRi_b5_w100_n150.csv +++ b/proxbias/data/TianKampmann2021_CRISPRi_b5_w100_n150.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21072eee3292c09d6b437c4efee8d16787e70de6b2211c4ff2983309c60483d9 -size 3825276 +oid sha256:324f94283c3fd6b232a44812e15c1a547a9d659a4cf681e778cdf380ea72ad28 +size 3919389 diff --git a/proxbias/notebooks/scPerturb_tables_heatmaps.ipynb b/proxbias/notebooks/scPerturb_tables_heatmaps.ipynb index 32ffaac..7ebaa71 100644 --- a/proxbias/notebooks/scPerturb_tables_heatmaps.ipynb +++ b/proxbias/notebooks/scPerturb_tables_heatmaps.ipynb @@ -5,7 +5,7 @@ "id": "631365de", "metadata": {}, "source": [ - "## Generate the tables with chromosomal loss information for all five scPerturb datasets (2 CRISPR-cas9 and 3 CRISPRi)" + "## Generate the tables with chromosomal loss information for all five scPerturb datasets (2 CRISPR-cas9 and 3 CRISPRi datasets)" ] }, { @@ -15,7 +15,16 @@ "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/safiye.celik/.pyenv/versions/3.11.5/envs/prox/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from proxbias import scPerturb_processing_plotting as sc\n", "import pandas as pd" @@ -28,7 +37,62 @@ "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ReplogleWeissman2022_rpe1\n", + "WARNING: Skipped 213 genes because they don't have a genomic position annotated. \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 50/50 [00:55<00:00, 1.11s/it]\n", + "/Users/safiye.celik/.pyenv/versions/3.11.5/envs/prox/lib/python3.11/site-packages/infercnvpy/tl/_infercnv.py:129: ImplicitModificationWarning: Setting element `.obsm['X_cnv']` of view, initializing view as actual.\n", + " adata.obsm[f\"X_{key_added}\"] = res\n", + "100%|██████████| 2066/2066 [15:25<00:00, 2.23it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TianKampmann2021_CRISPRi\n", + "WARNING: Skipped 11677 genes because they don't have a genomic position annotated. \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:11<00:00, 1.67s/it]\n", + "/Users/safiye.celik/.pyenv/versions/3.11.5/envs/prox/lib/python3.11/site-packages/infercnvpy/tl/_infercnv.py:129: ImplicitModificationWarning: Setting element `.obsm['X_cnv']` of view, initializing view as actual.\n", + " adata.obsm[f\"X_{key_added}\"] = res\n", + "100%|██████████| 177/177 [00:03<00:00, 58.39it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AdamsonWeissman2016_GSM2406681_10X010\n", + "WARNING: Skipped 13481 genes because they don't have a genomic position annotated. \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 14/14 [00:20<00:00, 1.44s/it]\n", + "/Users/safiye.celik/.pyenv/versions/3.11.5/envs/prox/lib/python3.11/site-packages/infercnvpy/tl/_infercnv.py:129: ImplicitModificationWarning: Setting element `.obsm['X_cnv']` of view, initializing view as actual.\n", + " adata.obsm[f\"X_{key_added}\"] = res\n", + "100%|██████████| 78/78 [00:01<00:00, 57.67it/s]\n" + ] + } + ], "source": [ "sc.generate_specific_loss_and_summary_tables(\n", " [\n", @@ -92,51 +156,51 @@ " \n", " \n", " 0\n", - " POU2F2\n", + " IFNGR1\n", " CRISPR-cas9\n", " Papalexi\n", - " chr19q\n", + " chr6q\n", " 3'\n", - " 661\n", - " 34.0\n", - " 5.14\n", + " 1342\n", + " 71.0\n", + " 5.29\n", " telomere\n", " \n", " \n", " 1\n", - " STAT2\n", + " POU2F2\n", " CRISPR-cas9\n", " Papalexi\n", - " chr12q\n", + " chr19q\n", " 3'\n", - " 660\n", - " 31.0\n", - " 4.69\n", + " 662\n", + " 29.0\n", + " 4.38\n", " telomere\n", " \n", " \n", " 2\n", - " IFNGR1\n", + " STAT2\n", " CRISPR-cas9\n", " Papalexi\n", - " chr6q\n", + " chr12q\n", " 3'\n", - " 1341\n", - " 62.0\n", - " 4.62\n", + " 660\n", + " 28.0\n", + " 4.24\n", " telomere\n", " \n", " \n", " 3\n", - " IFNGR2\n", + " IRF7\n", " CRISPR-cas9\n", " Papalexi\n", - " chr21q\n", + " chr11p\n", " 3'\n", - " 1214\n", - " 38.0\n", - " 3.13\n", - " telomere\n", + " 614\n", + " 19.0\n", + " 3.09\n", + " centromere\n", " \n", " \n", " 4\n", @@ -145,9 +209,9 @@ " Papalexi\n", " chr11q\n", " 3'\n", - " 518\n", - " 14.0\n", - " 2.70\n", + " 517\n", + " 12.0\n", + " 2.32\n", " telomere\n", " \n", " \n", @@ -163,31 +227,19 @@ " ...\n", " \n", " \n", - " 156\n", - " ATXN1\n", - " CRISPRi\n", - " Tian\n", - " chr6p\n", - " 5'\n", - " 187\n", - " 4.0\n", - " 2.13\n", - " telomere\n", - " \n", - " \n", - " 157\n", - " THAP1\n", + " 167\n", + " PRKAG2\n", " CRISPRi\n", " Tian\n", - " chr8p\n", + " chr7q\n", " 5'\n", - " 142\n", + " 119\n", " 2.0\n", - " 1.40\n", - " telomere\n", + " 1.68\n", + " centromere\n", " \n", " \n", - " 158\n", + " 168\n", " FAM171A2\n", " CRISPRi\n", " Tian\n", @@ -199,75 +251,87 @@ " centromere\n", " \n", " \n", - " 159\n", - " RHOT1\n", + " 169\n", + " XBP1\n", " CRISPRi\n", - " Tian\n", - " chr17q\n", + " Adamson\n", + " chr22q\n", + " 3'\n", + " 61\n", + " 2.0\n", + " 3.23\n", + " telomere\n", + " \n", + " \n", + " 170\n", + " XBP1\n", + " CRISPRi\n", + " Adamson\n", + " chr22q\n", " 5'\n", - " 175\n", - " 1.0\n", - " 0.57\n", + " 62\n", + " 5.0\n", + " 8.06\n", " centromere\n", " \n", " \n", - " 160\n", - " SRP72\n", + " 171\n", + " DERL2\n", " CRISPRi\n", " Adamson\n", - " chr4q\n", - " 3'\n", - " 518\n", - " 7.0\n", - " 1.35\n", + " chr17p\n", + " 5'\n", + " 654\n", + " 11.0\n", + " 1.68\n", " telomere\n", " \n", " \n", "\n", - "

161 rows × 9 columns

\n", + "

172 rows × 9 columns

\n", "" ], "text/plain": [ " Perturbed gene Perturbation type Dataset Chr arm Tested loss direction \\\n", - "0 POU2F2 CRISPR-cas9 Papalexi chr19q 3' \n", - "1 STAT2 CRISPR-cas9 Papalexi chr12q 3' \n", - "2 IFNGR1 CRISPR-cas9 Papalexi chr6q 3' \n", - "3 IFNGR2 CRISPR-cas9 Papalexi chr21q 3' \n", + "0 IFNGR1 CRISPR-cas9 Papalexi chr6q 3' \n", + "1 POU2F2 CRISPR-cas9 Papalexi chr19q 3' \n", + "2 STAT2 CRISPR-cas9 Papalexi chr12q 3' \n", + "3 IRF7 CRISPR-cas9 Papalexi chr11p 3' \n", "4 UBE2L6 CRISPR-cas9 Papalexi chr11q 3' \n", ".. ... ... ... ... ... \n", - "156 ATXN1 CRISPRi Tian chr6p 5' \n", - "157 THAP1 CRISPRi Tian chr8p 5' \n", - "158 FAM171A2 CRISPRi Tian chr17q 5' \n", - "159 RHOT1 CRISPRi Tian chr17q 5' \n", - "160 SRP72 CRISPRi Adamson chr4q 3' \n", + "167 PRKAG2 CRISPRi Tian chr7q 5' \n", + "168 FAM171A2 CRISPRi Tian chr17q 5' \n", + "169 XBP1 CRISPRi Adamson chr22q 3' \n", + "170 XBP1 CRISPRi Adamson chr22q 5' \n", + "171 DERL2 CRISPRi Adamson chr17p 5' \n", "\n", " Total # cells # affected cells % affected cells \\\n", - "0 661 34.0 5.14 \n", - "1 660 31.0 4.69 \n", - "2 1341 62.0 4.62 \n", - "3 1214 38.0 3.13 \n", - "4 518 14.0 2.70 \n", + "0 1342 71.0 5.29 \n", + "1 662 29.0 4.38 \n", + "2 660 28.0 4.24 \n", + "3 614 19.0 3.09 \n", + "4 517 12.0 2.32 \n", ".. ... ... ... \n", - "156 187 4.0 2.13 \n", - "157 142 2.0 1.40 \n", - "158 151 1.0 0.66 \n", - "159 175 1.0 0.57 \n", - "160 518 7.0 1.35 \n", + "167 119 2.0 1.68 \n", + "168 151 1.0 0.66 \n", + "169 61 2.0 3.23 \n", + "170 62 5.0 8.06 \n", + "171 654 11.0 1.68 \n", "\n", " Towards telomere or centromere \n", "0 telomere \n", "1 telomere \n", "2 telomere \n", - "3 telomere \n", + "3 centromere \n", "4 telomere \n", ".. ... \n", - "156 telomere \n", - "157 telomere \n", - "158 centromere \n", - "159 centromere \n", - "160 telomere \n", + "167 centromere \n", + "168 centromere \n", + "169 telomere \n", + "170 centromere \n", + "171 telomere \n", "\n", - "[161 rows x 9 columns]" + "[172 rows x 9 columns]" ] }, "metadata": {}, @@ -333,10 +397,10 @@ " Frangieh\n", " 237\n", " 3'\n", - " 11.8\n", - " 28\n", - " 22\n", - " 6\n", + " 13.1\n", + " 31\n", + " 23\n", + " 8\n", " \n", " \n", " 1\n", @@ -344,10 +408,10 @@ " Frangieh\n", " 237\n", " 5'\n", - " 14.8\n", - " 35\n", - " 22\n", - " 13\n", + " 14.3\n", + " 34\n", + " 20\n", + " 14\n", " \n", " \n", " 2\n", @@ -355,10 +419,10 @@ " Papalexi\n", " 24\n", " 3'\n", - " 33.3\n", - " 8\n", + " 25.0\n", " 6\n", - " 2\n", + " 5\n", + " 1\n", " \n", " \n", " 3\n", @@ -366,10 +430,10 @@ " Papalexi\n", " 24\n", " 5'\n", - " 8.3\n", - " 2\n", + " 4.2\n", " 1\n", " 1\n", + " 0\n", " \n", " \n", " 4\n", @@ -385,45 +449,56 @@ " \n", " 5\n", " CRISPRi\n", + " Adamson\n", + " 78\n", + " 5'\n", + " 2.6\n", + " 2\n", + " 1\n", + " 1\n", + " \n", + " \n", + " 6\n", + " CRISPRi\n", " Replogle\n", " 2066\n", " 3'\n", - " 1.8\n", - " 38\n", - " 27\n", - " 11\n", + " 2.2\n", + " 45\n", + " 31\n", + " 14\n", " \n", " \n", - " 6\n", + " 7\n", " CRISPRi\n", " Replogle\n", " 2066\n", " 5'\n", - " 2.0\n", - " 42\n", - " 14\n", - " 28\n", + " 2.2\n", + " 45\n", + " 11\n", + " 34\n", " \n", " \n", - " 7\n", + " 8\n", " CRISPRi\n", " Tian\n", " 177\n", " 3'\n", - " 1.1\n", - " 2\n", - " 1\n", - " 1\n", + " 1.7\n", + " 3\n", + " 3\n", + " 0\n", " \n", " \n", - " 8\n", + " 9\n", " CRISPRi\n", " Tian\n", " 177\n", " 5'\n", - " 2.8\n", - " 5\n", - " 3\n", + " 2.3\n", + " 4\n", + " 2\n", " 2\n", " \n", " \n", @@ -437,32 +512,35 @@ "2 CRISPR-cas9 Papalexi 24 3' \n", "3 CRISPR-cas9 Papalexi 24 5' \n", "4 CRISPRi Adamson 78 3' \n", - "5 CRISPRi Replogle 2066 3' \n", - "6 CRISPRi Replogle 2066 5' \n", - "7 CRISPRi Tian 177 3' \n", - "8 CRISPRi Tian 177 5' \n", + "5 CRISPRi Adamson 78 5' \n", + "6 CRISPRi Replogle 2066 3' \n", + "7 CRISPRi Replogle 2066 5' \n", + "8 CRISPRi Tian 177 3' \n", + "9 CRISPRi Tian 177 5' \n", "\n", " % targets w/ specific loss # targets w/ specific loss \\\n", - "0 11.8 28 \n", - "1 14.8 35 \n", - "2 33.3 8 \n", - "3 8.3 2 \n", + "0 13.1 31 \n", + "1 14.3 34 \n", + "2 25.0 6 \n", + "3 4.2 1 \n", "4 1.3 1 \n", - "5 1.8 38 \n", - "6 2.0 42 \n", - "7 1.1 2 \n", - "8 2.8 5 \n", + "5 2.6 2 \n", + "6 2.2 45 \n", + "7 2.2 45 \n", + "8 1.7 3 \n", + "9 2.3 4 \n", "\n", " # targets w/ loss towards telomere # targets w/ loss towards centromere \n", - "0 22 6 \n", - "1 22 13 \n", - "2 6 2 \n", - "3 1 1 \n", + "0 23 8 \n", + "1 20 14 \n", + "2 5 1 \n", + "3 1 0 \n", "4 1 0 \n", - "5 27 11 \n", - "6 14 28 \n", - "7 1 1 \n", - "8 3 2 " + "5 1 1 \n", + "6 31 14 \n", + "7 11 34 \n", + "8 3 0 \n", + "9 2 2 " ] }, "metadata": {}, @@ -479,15 +557,7 @@ "id": "9407a97b", "metadata": {}, "source": [ - "## Plot the heatmaps for the genes and cells with loss" - ] - }, - { - "cell_type": "markdown", - "id": "9d7c6669", - "metadata": {}, - "source": [ - "Let's generate the heatmaps in Figure 2E-F. Here we only " + "## Plot the heatmaps for the genes and cells with loss (Figures 2d-e in the paper)" ] }, { @@ -506,24 +576,19 @@ ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d2b58172481c408995f0c0c993485572", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/2 [00:00" + "
" ] }, "metadata": {}, @@ -537,24 +602,19 @@ ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "feb4220f2dbf47f393dc17e3d03b21ad", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/15 [00:00" + "
" ] }, "metadata": {}, @@ -564,21 +624,13 @@ "source": [ "sc.plot_loss_for_selected_genes([\"PapalexiSatija2021_eccite_RNA\", \"FrangiehIzar2021_RNA\"])" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "98d81579", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "prb", + "display_name": "prox", "language": "python", - "name": "prb" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -590,7 +642,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.11.5" } }, "nbformat": 4,