Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #403

Merged
merged 14 commits into from
Mar 19, 2024
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
Package: immunarch
Type: Package
Title: Bioinformatics Analysis of T-Cell and B-Cell Immune Repertoires
Version: 0.9.0
Version: 0.9.1
Authors@R: c(
person("Vadim I.", "Nazarov", , "support@immunomind.io", c("aut", "cre")),
person("Vasily O.", "Tsvetkov", , role = "aut"),
person("Siarhei", "Fiadziushchanka", , role = "aut"),
person("Eugene", "Rumynskiy", , role = "aut"),
person("Aleksandr A.", "Popov", , role = "aut"),
person("Ivan", "Balashov", , role = "aut"),
Expand All @@ -23,7 +24,7 @@ Description: A comprehensive framework for bioinformatics exploratory analysis o
and gene segments, repertoire diversity analysis, annotation of clonotypes using external immune receptor
databases and clonotype tracking in vaccination and cancer studies. A successor to our
previously published 'tcR' immunoinformatics package (Nazarov 2015) <doi:10.1186/s12859-015-0613-1>.
License: AGPL-3
License: Apache License (== 2.0)
URL: https://immunarch.com/, https://github.com/immunomind/immunarch
BugReports: https://github.com/immunomind/immunarch/issues
Imports:
Expand Down Expand Up @@ -84,6 +85,6 @@ Suggests:
rmarkdown
VignetteBuilder: knitr
Encoding: UTF-8
RoxygenNote: 7.2.2
RoxygenNote: 7.3.1
LazyData: true
LazyDataCompression: xz
862 changes: 201 additions & 661 deletions LICENSE

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Generated by roxygen2: do not edit by hand

S3method(cosine_sim,default)
S3method(cosine_sim,numeric)
S3method(jaccard_index,character)
S3method(jaccard_index,default)
S3method(overlap_coef,character)
S3method(overlap_coef,default)
S3method(tversky_index,character)
S3method(tversky_index,default)
S3method(vis,clonal_family)
S3method(vis,clonal_family_tree)
S3method(vis,immunr_chao1)
Expand Down Expand Up @@ -150,6 +158,7 @@ importFrom(dplyr,n)
importFrom(dplyr,one_of)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,row_number)
importFrom(dplyr,rowwise)
importFrom(dplyr,select)
importFrom(dplyr,select_)
Expand Down
5 changes: 3 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

fill_vec <- function(read_vec, read_indices) {
.Call(`_immunarch_fill_vec`, read_vec, read_indices)
.Call(`_immunarch_fill_vec`, read_vec, read_indices)
}

fill_reads <- function(new_reads, new_counts) {
.Call(`_immunarch_fill_reads`, new_reads, new_counts)
.Call(`_immunarch_fill_reads`, new_reads, new_counts)
}

2 changes: 0 additions & 2 deletions R/explore.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,5 +145,3 @@ repExplore <- function(.data, .method = c("volume", "count", "len", "clones"), .

res
}

rep.ex <- repExplore
25 changes: 25 additions & 0 deletions R/immunarch-remaster.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# .check_immundata <- function (.object) {
# if (!is.instance(.object, "ImmunData")) {
# stop("Error: the input object is not of class ImmunData. Immunarch works on ImmunData only. Helpful manual: ...")
# }
# }
#
# .repertoire_overlap <- function (.data, .method, .verbose = TRUE, ...) {
# check_immundata(.data)
# }
#
# .gene_usage <- function (.data, .gene, .type, .use_counts, .norm, .gene_vec) {
#
# }
#
# .repertoire_diversity <- function (.data, .method, .verbose = TRUE, ...) {
#
# }
#
# .track_clonotypes <- function () {
#
# }
#
# .public_repertoire <- function () {
#
# }
26 changes: 19 additions & 7 deletions R/io-parsers.R
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,10 @@ parse_repertoire <- function(.filename, .mode, .nuc.seq, .aa.seq, .count,
.vend, .dstart, .dend, .jstart,
.total.insertions, .vd.insertions, .dj.insertions
)
if (!is.na(.add[1])) {
if (!has_no_data(.add)) {
vec_names <- c(vec_names, .add)
# add missing columns
df %<>% add_empty_columns(.add[!(.add %in% colnames(df))])
}

df <- df[, vec_names]
Expand Down Expand Up @@ -400,7 +402,7 @@ parse_mitcr <- function(.filename, .mode) {
}

parse_mixcr <- function(.filename, .mode, .count = c("clonecount", "readcount")) {
.filename <- .filename
.filename %<>% .as_tsv()
.id <- "cloneid"
.count %<>% tolower()
.sep <- "\t"
Expand Down Expand Up @@ -727,6 +729,11 @@ parse_mixcr <- function(.filename, .mode, .count = c("clonecount", "readcount"))
}
}

# fill cloneid column if it not exists
if (!(.id %in% colnames(df))) {
df %<>% mutate("{.id}" := row_number())
}

df <- df[, make.names(df_columns)]
colnames(df) <- df_column_names

Expand Down Expand Up @@ -962,13 +969,18 @@ parse_airr <- function(.filename, .mode) {
.as_tsv() %>%
airr::read_rearrangement()

bcr_pipeline_columns <- c(
"cdr1", "cdr2", "cdr1_aa", "cdr2_aa", "fwr1", "fwr2", "fwr3", "fwr4",
"fwr1_aa", "fwr2_aa", "fwr3_aa", "fwr4_aa"
)
df %<>%
select_(
add_empty_columns(bcr_pipeline_columns[!(bcr_pipeline_columns %in% colnames(df))]) %>%
select(
"sequence", "v_call", "d_call", "j_call", "junction", "junction_aa",
~contains("v_germline_end"), ~contains("d_germline_start"),
~contains("d_germline_end"), ~contains("j_germline_start"),
~contains("np1_length"), ~contains("np2_length"),
~contains("duplicate_count"),
contains("v_germline_end"), contains("d_germline_start"),
contains("d_germline_end"), contains("j_germline_start"),
contains("np1_length"), contains("np2_length"),
contains("duplicate_count"),
"cdr1", "cdr2", "cdr1_aa", "cdr2_aa", "fwr1", "fwr2", "fwr3", "fwr4",
"fwr1_aa", "fwr2_aa", "fwr3_aa", "fwr4_aa"
)
Expand Down
7 changes: 5 additions & 2 deletions R/io-utility.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
.remove.ext <- function(.str) {
# gsub(pattern = '.*/|[.].*$', replacement = '', x = .str)
gsub(pattern = ".*/|[.](txt|tsv|csv)$|([.](txt|tsv|csv))?[.](gz|bzip|bzip2|bz2)$", replacement = "", x = .str)
.str %<>% str_replace(".*/", "") %>%
str_replace(".*\\\\", "") %>%
str_replace("(\\.gz|\\.bzip|\\.bzip2|\\.bz2)$", "") %>%
str_replace("(\\.txt|\\.tsv|\\.csv)$", "")
return(.str)
}


Expand Down
6 changes: 3 additions & 3 deletions R/io.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ if (getRversion() >= "2.15.1") {
#' @importFrom jsonlite read_json
#' @importFrom stringr str_split str_detect str_replace_all str_trim
#' @importFrom methods as
#' @importFrom dplyr contains first select_ group_by_at one_of
#' @importFrom dplyr contains first select_ group_by_at one_of row_number
#' @importFrom utils read.table
#' @importFrom data.table setDF
#'
Expand Down Expand Up @@ -291,13 +291,13 @@ repLoad <- function(.path, .mode = "paired", .coding = TRUE, ...) {
missed_in_metadata <- setdiff(.metadata$Sample, .rep_names)
if (length(missed_in_folders) || length(missed_in_metadata)) {
if (length(missed_in_metadata)) {
message(" -- Samples found in the metadata, but not in the folder:\n ", missed_in_metadata)
message(" -- Samples found in the metadata, but not in the folder:\n ", toString(missed_in_metadata))
message(" Did you correctly specify all the sample names in the metadata file?")

error_flag <- TRUE
}
if (length(missed_in_folders)) {
message(" -- Samples found in the folder, but not in the metadata:\n ", missed_in_folders)
message(" -- Samples found in the folder, but not in the metadata:\n ", toString(missed_in_folders))
message(" Did you add all the necessary samples to the metadata file with correct names?")
message(" Creating dummy sample records in the metadata for now...")

Expand Down
8 changes: 8 additions & 0 deletions R/overlap.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,14 @@ overlap_coef <- function(.x, .y) {
UseMethod("overlap_coef")
}

#' @export
overlap_coef.default <- function(.x, .y) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
nrow(dplyr::intersect(.x, .y)) / min(nrow(.x), nrow(.y))
}

#' @export
overlap_coef.character <- function(.x, .y) {
length(dplyr::intersect(.x, .y)) / min(length(.x), length(.y))
}
Expand All @@ -211,13 +213,15 @@ jaccard_index <- function(.x, .y) {
UseMethod("jaccard_index")
}

#' @export
jaccard_index.default <- function(.x, .y) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
intersection <- nrow(dplyr::intersect(.x, .y))
intersection / (nrow(.x) + nrow(.y) - intersection)
}

#' @export
jaccard_index.character <- function(.x, .y) {
intersection <- length(dplyr::intersect(.x, .y))
intersection / (length(.x) + length(.y) - intersection)
Expand All @@ -227,13 +231,15 @@ tversky_index <- function(.x, .y, .a = .5, .b = .5) {
UseMethod("tversky_index")
}

#' @export
tversky_index.default <- function(.x, .y, .a = .5, .b = .5) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
intersection <- nrow(dplyr::intersect(.x, .y))
intersection / (.a * nrow(dplyr::setdiff(.x, .y)) + .b * nrow(dplyr::setdiff(.y, .x)) + intersection)
}

#' @export
tversky_index.character <- function(.x, .y, .a = .5, .b = .5) {
intersection <- length(dplyr::intersect(.x, .y))
intersection / (.a * length(dplyr::setdiff(.x, .y)) + .b * length(dplyr::setdiff(.y, .x)) + intersection)
Expand All @@ -243,6 +249,7 @@ cosine_sim <- function(.x, .y, .quant) {
UseMethod("cosine_sim")
}

#' @export
cosine_sim.default <- function(.x, .y, .quant) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
Expand All @@ -258,6 +265,7 @@ cosine_sim.default <- function(.x, .y, .quant) {
sum(first_col * second_col) / (sqrt(sum(first_col * first_col)) * sqrt(sum(second_col * second_col)))
}

#' @export
cosine_sim.numeric <- function(.x, .y, .quant) {
df <- rbind(.x, .y)
sum(.x * .y) / (sqrt(rowSums(df^2))[1] * sqrt(rowSums(df^2))[2])[[1]]
Expand Down
2 changes: 1 addition & 1 deletion R/sampling.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#'
#' Note: each connection must represent a separate repertoire.
#'
#' @param .method Character. Name of a sampling method. See "Description" for more details. Default value is "downsample"
#' @param .method Character. Name of a sampling method. See "Details" for more details. Default value is "downsample"
#' that downsamples the repertoires to the number of clones (i.e., reads / UMIs) that the smallest repertoire has, if user
#' doesn't set any value to the ".n" argument.
#'
Expand Down
9 changes: 5 additions & 4 deletions R/seqCluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,11 @@ seqCluster <- function(.data, .dist, .perc_similarity, .nt_similarity, .fixed_th
if (!all(is.na(grouping_cols))) {
result_multi %<>% map2_df(., pmap(group_values, data.frame)[!singleseq_flag], ~ cbind(.x, .y))
res <- rbind(result_single, result_multi)
res[grouping_cols] <- str_split(str_split(res[["Cluster"]],
pattern = "_", simplify = TRUE
)[, 1],
pattern = "/", simplify = TRUE
res[grouping_cols] <- str_split(
str_split(res[["Cluster"]],
pattern = "_", simplify = TRUE
)[, 1],
pattern = "/", simplify = TRUE
)[, seq_along(grouping_cols)]
} else {
result_multi %<>% map_df(., ~.x)
Expand Down
1 change: 0 additions & 1 deletion R/shiny.R
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ fixVis <- function(.plot = NA) {
#
server <- function(input, output, session) {
create_plot <- function(input) {

# TODO: make automatic detection of available themes from ggplot2 and other packages
choose_theme <- function(theme_label) {
switch(theme_label,
Expand Down
11 changes: 11 additions & 0 deletions R/tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,17 @@ add_column_with_first_gene <- function(.data, .original_colname, .target_colname
return(.data)
}

# add columns filled with NA
add_empty_columns <- function(.data, .colnames) {
if (length(.colnames) > 0) {
new_columns <- rep(list(NA), length(.colnames))
names(new_columns) <- .colnames
return(do.call(cbind, c(list(.data), new_columns)))
} else {
return(.data)
}
}

# used to add sample name to error/warning messages when sample name is available
optional_sample <- function(prefix, sample_name, suffix) {
if (is.na(sample_name) || (sample_name == "")) {
Expand Down
18 changes: 5 additions & 13 deletions R/vis.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ if (getRversion() >= "2.15.1") {
"Overlap", "head", "Mean", "MeanVal", "MinVal", "MaxVal",
"Q1", "Q2", "Type", "Length", "Gene", "Freq", "Sequence",
"AA", "Clones", "Source.gr", "Target.gr", "Samples", "Samples.y",
"CDR3.aa", "p.adj", "group1", "group2", "y.coord", "..p.adj..", ".SD",
"CDR3.aa", "p.adj", "group1", "group2", "y.coord", ".SD",
"name", "label", "."
))
}
Expand Down Expand Up @@ -47,15 +47,11 @@ if (getRversion() >= "2.15.1") {


.tweak_fill <- function(.n) {
palette_name <- ""
if (.n == 1) {
palette_name <- "Set2"
} else if (.n == 2) {
palette_name <- "Set1"
}
# else if (.n < 4) { palette_name = "YlGnBu" }
# else if (.n < 6) { palette_name = "RdBu" }
else if (.n < 12) {
} else if (.n < 12) {
palette_name <- "Spectral"
} else {
return(scale_fill_hue())
Expand All @@ -65,15 +61,11 @@ if (getRversion() >= "2.15.1") {
}

.tweak_col <- function(.n) {
palette_name <- ""
if (.n == 1) {
palette_name <- "Set2"
} else if (.n == 2) {
palette_name <- "Set1"
}
# else if (.n < 4) { palette_name = "YlGnBu" }
# else if (.n < 6) { palette_name = "RdBu" }
else if (.n < 12) {
} else if (.n < 12) {
palette_name <- "Spectral"
} else {
return(scale_colour_hue())
Expand Down Expand Up @@ -1469,7 +1461,7 @@ vis_box <- function(.data, .by = NA, .meta = NA, .melt = TRUE,
# print(p_df)

p <- p +
stat_compare_means(aes(label = ..p.adj..),
stat_compare_means(aes(label = after_stat(p.adj)),
bracket.size = .5, size = .signif.label.size,
label.y = max(.data$Value, na.rm = TRUE) * 1.07
)
Expand Down Expand Up @@ -2188,7 +2180,7 @@ vis_bar <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .
# print(p_df)

p <- p +
stat_compare_means(aes(label = ..p.adj..),
stat_compare_means(aes(label = after_stat(p.adj)),
bracket.size = .5, size = .signif.label.size,
label.y = max(.data$Value, na.rm = TRUE) * 1.07
)
Expand Down
Loading
Loading