From 75831be6e703af3d79f8a5d62ea427e69798d4da Mon Sep 17 00:00:00 2001 From: Kate Isaac <41767733+kweav@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:44:03 -0400 Subject: [PATCH] add beginning of GI review --- inst/rmd/scratch_gimap_GI_review.Rmd | 77 ++++ inst/rmd/scratch_gimap_GI_review.html | 508 ++++++++++++++++++++++++++ 2 files changed, 585 insertions(+) create mode 100644 inst/rmd/scratch_gimap_GI_review.Rmd create mode 100644 inst/rmd/scratch_gimap_GI_review.html diff --git a/inst/rmd/scratch_gimap_GI_review.Rmd b/inst/rmd/scratch_gimap_GI_review.Rmd new file mode 100644 index 0000000..fe54cb6 --- /dev/null +++ b/inst/rmd/scratch_gimap_GI_review.Rmd @@ -0,0 +1,77 @@ +--- +title: "scratch gimap Genetic Interaction Review" +output: html_document +date: "`r Sys.Date()`" +--- + +```{r warning=FALSE, message=FALSE} +library(tidyverse) +library(devtools) + +devtools::load_all() +``` + +## Get the gimap GI results + +```{r} + gimap_dataset <- get_example_data("gimap") %>% + gimap_filter() %>% + gimap_annotate() %>% + gimap_normalize( + timepoints = "day", + replicates = "rep") %>% + calc_crispr() %>% + calc_gi() +``` + +## Load the GI Mapping GI results + +```{r} +old_gi_results <- readRDS("~/Downloads/results/calculate_GI_scores/tables/rds/d.HeLa_GI_scores_target") +``` + +## Compare results + +### Columns + +```{r} +colnames(gimap_dataset$gi_scores) +colnames(old_gi_results) +``` + +Looks like we want to compare the pgRNA targets which should be the `pgRNA_target_double` column for the gimap results and the `pgRNA_target` column for the GI Mapping results + +Columns in the GI Mapping results not in the gimap results seem to include model results (`intercept`, `slope`) as well as some significance values (`p_val`, `fdr`), observed and expected CRISPR score (`mean_observed_CS` and `mean_expected_CS` (which I think I need for plotting)). And how does `mean_GI_score` relate to the three GI score columns in the gimap results? + +### Target overlap + +```{r} +length(unique(old_gi_results$pgRNA_target)) +length(unique(gimap_dataset$gi_scores$pgRNA_target_double)) +length(intersect(unique(old_gi_results$pgRNA_target), unique(gimap_dataset$gi_scores$pgRNA_target_double))) +``` + +Only 1/3 of the GI Mapping targets are represented in the gimap results + +### Number of observations/rows total and rep column overlap + +```{r} +nrow(old_gi_results) +nrow(gimap_dataset$gi_scores) +``` + +The gimap results has far more rows + +```{r} +table(old_gi_results$rep) + +table(gimap_dataset$gi_scores$rep) +``` + +While the replicate values themselves can be synced like we've done in other reviews, this further shows the far greater number of observations (even if there are fewer represented pgRNA targets) in the gimap results + +```{r} +head(old_gi_results) + +``` + diff --git a/inst/rmd/scratch_gimap_GI_review.html b/inst/rmd/scratch_gimap_GI_review.html new file mode 100644 index 0000000..09c979b --- /dev/null +++ b/inst/rmd/scratch_gimap_GI_review.html @@ -0,0 +1,508 @@ + + + + + + + + + + + + + + +scratch gimap Genetic Interaction Review + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
library(tidyverse)
+library(devtools)
+
+devtools::load_all()
+
+

Get the gimap GI results

+
 gimap_dataset <- get_example_data("gimap") %>%
+   gimap_filter() %>%
+   gimap_annotate() %>%
+   gimap_normalize(
+     timepoints = "day",
+     replicates = "rep") %>%
+  calc_crispr() %>%
+  calc_gi()
+
## Annotating Data
+
## Rows: 1884 Columns: 3
+## ── Column specification ────────────────────────────────────────────────────────
+## Delimiter: ","
+## chr (3): gene, gene_symbol, entrez_id
+## 
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+## Normalizing Log Fold Change
+## 
+## Calculating CRISPR score
+## 
+## Calculating Genetic Interaction scores
+
+
+

Load the GI Mapping GI results

+
old_gi_results <- readRDS("~/Downloads/results/calculate_GI_scores/tables/rds/d.HeLa_GI_scores_target")
+
+
+

Compare results

+
+

Columns

+
colnames(gimap_dataset$gi_scores)
+
## [1] "pgRNA_target_double"      "rep"                     
+## [3] "double_target_gi_score"   "single_target_gi_score_1"
+## [5] "single_target_gi_score_2"
+
colnames(old_gi_results)
+
##  [1] "rep"               "paralog_pair"      "target_type"      
+##  [4] "pgRNA_target"      "gene1_symbol"      "gene2_symbol"     
+##  [7] "intercept"         "slope"             "broad_target_type"
+## [10] "p_val"             "fdr"               "mean_observed_CS" 
+## [13] "mean_expected_CS"  "mean_GI_score"
+

Looks like we want to compare the pgRNA targets which should be the +pgRNA_target_double column for the gimap results and the +pgRNA_target column for the GI Mapping results

+

Columns in the GI Mapping results not in the gimap results seem to +include model results (intercept, slope) as +well as some significance values (p_val, fdr), +observed and expected CRISPR score (mean_observed_CS and +mean_expected_CS (which I think I need for plotting)). And +how does mean_GI_score relate to the three GI score columns +in the gimap results?

+
+
+

Target overlap

+
length(unique(old_gi_results$pgRNA_target))
+
## [1] 3086
+
length(unique(gimap_dataset$gi_scores$pgRNA_target_double))
+
## [1] 1031
+
length(intersect(unique(old_gi_results$pgRNA_target), unique(gimap_dataset$gi_scores$pgRNA_target_double)))
+
## [1] 1030
+

Only 1/3 of the GI Mapping targets are represented in the gimap +results

+
+
+

Number of observations/rows total and rep column overlap

+
nrow(old_gi_results)
+
## [1] 9258
+
nrow(gimap_dataset$gi_scores)
+
## [1] 178267
+

The gimap results has far more rows

+
table(old_gi_results$rep)
+
## 
+##    A    B    C 
+## 3086 3086 3086
+
table(gimap_dataset$gi_scores$rep)
+
## 
+## late_RepA late_RepB late_RepC 
+##     59423     59442     59402
+

While the replicate values themselves can be synced like we’ve done +in other reviews, this further shows the far greater number of +observations (even if there are fewer represented pgRNA targets) in the +gimap results

+
head(old_gi_results)
+
## # A tibble: 6 × 14
+##   rep   paralog_pair    target_type pgRNA_target    gene1_symbol gene2_symbol
+##   <chr> <chr>           <chr>       <chr>           <chr>        <chr>       
+## 1 A     AADAC_AADACL2   gene_gene   AADAC_AADACL2   AADAC        AADACL2     
+## 2 A     AADACL3_AADACL4 gene_gene   AADACL3_AADACL4 AADACL3      AADACL4     
+## 3 A     ABCC1_ABCC3     gene_gene   ABCC1_ABCC3     ABCC1        ABCC3       
+## 4 A     ABCC8_ABCC9     gene_gene   ABCC8_ABCC9     ABCC8        ABCC9       
+## 5 A     ABCD1_ABCD2     gene_gene   ABCD1_ABCD2     ABCD1        ABCD2       
+## 6 A     ABCG1_ABCG4     gene_gene   ABCG1_ABCG4     ABCG1        ABCG4       
+## # ℹ 8 more variables: intercept <dbl>, slope <dbl>, broad_target_type <chr>,
+## #   p_val <dbl>, fdr <dbl>, mean_observed_CS <dbl>, mean_expected_CS <dbl>,
+## #   mean_GI_score <dbl>
+
+
+ + + + +
+ + + + + + + + + + + + + + +