From eec32cb4f6ce57035bcf3ba2276cf9322cd38e48 Mon Sep 17 00:00:00 2001 From: js2264 Date: Wed, 19 Jun 2024 09:39:57 +0200 Subject: [PATCH] feat: add_pairdist and pair_granges functions --- DESCRIPTION | 2 + NAMESPACE | 2 + R/add_pairdist.R | 47 ++++++++++++++++++++ R/pair-granges.R | 32 ++++++++++++++ _pkgdown.yml | 4 +- man/add-pairdist.Rd | 34 +++++++++++++++ man/pair-granges.Rd | 30 +++++++++++++ tests/testthat/test-enrich.R | 84 ++++++++++++++++++++++++++++++++++++ 8 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 R/add_pairdist.R create mode 100644 R/pair-granges.R create mode 100644 man/add-pairdist.Rd create mode 100644 man/pair-granges.Rd create mode 100644 tests/testthat/test-enrich.R diff --git a/DESCRIPTION b/DESCRIPTION index 79bb463..c306651 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,6 +52,7 @@ RoxygenNote: 7.3.1 Collate: 'AllGenerics.R' 'AllClasses.R' + 'add_pairdist.R' 'anchor.R' 'annotate.R' 'arrange.R' @@ -79,6 +80,7 @@ Collate: 'methods-PinnedGInteractions.R' 'methods-show.R' 'mutate.R' + 'pair-granges.R' 'pin.R' 'plyinteractions.R' 'reexports-dplyr.R' diff --git a/NAMESPACE b/NAMESPACE index c58016d..b47c66d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -72,6 +72,7 @@ S3method(tally,GroupedGInteractions) S3method(tbl_vars,GInteractions) S3method(unanchor,AnchoredPinnedGInteractions) S3method(ungroup,GroupedGInteractions) +export(add_pairdist) export(anchor) export(anchor_3p) export(anchor_5p) @@ -110,6 +111,7 @@ export(join_overlap_left) export(join_overlap_left_directed) export(mutate) export(n_groups) +export(pair_granges) export(pin) export(pin_anchors1) export(pin_anchors2) diff --git a/R/add_pairdist.R b/R/add_pairdist.R new file mode 100644 index 0000000..5d61794 --- /dev/null +++ b/R/add_pairdist.R @@ -0,0 +1,47 @@ +#' Appends distance between interaction anchors +#' +#' Appends distance between interaction anchors, using +#' `InteractionSet::pairdist` +#' +#' @param x The query GInteractions +#' @param type A character string specifying the type of distance to compute. Can take values of "mid", "gap", "span", "diag" or "intra". +#' @param colname name of column to hold pair distance values +#' +#' @return The GInteractions with an additional column containing the +#' distance between each pair of anchors. +#' +#' @rdname add-pairdist +#' +#' @export +#' +#' @examples +#' gi <- read.table(text = " +#' chr1 100 200 chr1 5000 5100 bedpe_example1 30 + - +#' chr1 1000 5000 chr2 3000 3800 bedpe_example2 100 + -", +#' col.names = c( +#' "seqnames1", "start1", "end1", +#' "seqnames2", "start2", "end2", "name", "score", "strand1", "strand2") +#' ) |> as_ginteractions() +#' +#' add_pairdist(gi) +#' @export + +add_pairdist <- function(x, type = 'mid', colname = 'pairdist') { + + if (colname %in% names(GenomicRanges::mcols(x))){ + stop(paste0(colname, " already exists in destination metadata")) + } + + if (is.null(GenomicRanges::mcols(x))){ + # handle IRanges NULL adding X column of NA's + meta <- S4Vectors::DataFrame("distance" = NA_integer_) + names(meta) <- colname + GenomicRanges::mcols(x) <- meta + } else { + GenomicRanges::mcols(x)[[colname]] <- NA_integer_ + } + + GenomicRanges::mcols(x)[[colname]] <- InteractionSet::pairdist(x, type) + + x +} diff --git a/R/pair-granges.R b/R/pair-granges.R new file mode 100644 index 0000000..10722b1 --- /dev/null +++ b/R/pair-granges.R @@ -0,0 +1,32 @@ +#' Pairwise combination of a GRanges object +#' +#' Create a GInteractions object from a GRanges object, +#' containing all possible entry pairs +#' +#' @param x A GRanges object +#' +#' @return A GInteractions object +#' +#' @rdname pair-granges +#' +#' @export +#' +#' @examples +#' gr <- read.table(text = " +#' chr1 100 200 +#' chr1 5000 5100 +#' chr1 1000 5000 +#' chr2 3000 3800", +#' col.names = c( +#' "seqnames", "start", "end" +#' )) |> plyranges::as_granges() +#' +#' pair_granges(gr) +#' @export + +pair_granges <- function(x) { + + combs <- combn(length(x), 2) + InteractionSet::GInteractions(combs[1,], combs[2,], gr) + +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 9303167..c5a5b17 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -28,6 +28,7 @@ reference: contents: - as_ginteractions - ginteractions-getters + - pair-granges - title: "`dplyr` core verbs" contents: - starts_with("dplyr-") @@ -43,9 +44,10 @@ reference: - ginteractions-count-overlaps - ginteractions-filter-overlaps - ginteractions-join-overlap-left -- title: "Annotating GInteractions" +- title: "Enriching GInteractions" contents: - ginteractions-annotate + - add-pairdist - title: "Pinning GInteractions" contents: - pin diff --git a/man/add-pairdist.Rd b/man/add-pairdist.Rd new file mode 100644 index 0000000..55e87fe --- /dev/null +++ b/man/add-pairdist.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_pairdist.R +\name{add_pairdist} +\alias{add_pairdist} +\title{Appends distance between interaction anchors} +\usage{ +add_pairdist(x, type = "mid", colname = "pairdist") +} +\arguments{ +\item{x}{The query GInteractions} + +\item{type}{A character string specifying the type of distance to compute. Can take values of "mid", "gap", "span", "diag" or "intra".} + +\item{colname}{name of column to hold pair distance values} +} +\value{ +The GInteractions with an additional column containing the +distance between each pair of anchors. +} +\description{ +Appends distance between interaction anchors, using +\code{InteractionSet::pairdist} +} +\examples{ +gi <- read.table(text = " +chr1 100 200 chr1 5000 5100 bedpe_example1 30 + - +chr1 1000 5000 chr2 3000 3800 bedpe_example2 100 + -", +col.names = c( + "seqnames1", "start1", "end1", + "seqnames2", "start2", "end2", "name", "score", "strand1", "strand2") +) |> as_ginteractions() + +add_pairdist(gi) +} diff --git a/man/pair-granges.Rd b/man/pair-granges.Rd new file mode 100644 index 0000000..df49367 --- /dev/null +++ b/man/pair-granges.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pair-granges.R +\name{pair_granges} +\alias{pair_granges} +\title{Pairwise combination of a GRanges object} +\usage{ +pair_granges(x) +} +\arguments{ +\item{x}{A GRanges object} +} +\value{ +A GInteractions object +} +\description{ +Create a GInteractions object from a GRanges object, +containing all possible entry pairs +} +\examples{ +gr <- read.table(text = " +chr1 100 200 +chr1 5000 5100 +chr1 1000 5000 +chr2 3000 3800", +col.names = c( + "seqnames", "start", "end" +)) |> plyranges::as_granges() + +pair_granges(gr) +} diff --git a/tests/testthat/test-enrich.R b/tests/testthat/test-enrich.R new file mode 100644 index 0000000..c7565bd --- /dev/null +++ b/tests/testthat/test-enrich.R @@ -0,0 +1,84 @@ +test_that("enrich functions work", { + + gi <- read.table(text = " + chr1 100 200 chr1 5000 5100 bedpe_example1 30 + - + chr1 1000 5000 chr2 3000 3800 bedpe_example2 100 + -", + col.names = c( + "seqnames1", "start1", "end1", + "seqnames2", "start2", "end2", "name", "score", "strand1", "strand2" + ) + ) |> as_ginteractions() + + add_pairdist(gi) |> expect_identical( + new("GInteractions", anchor1 = 1:2, anchor2 = 3:4, regions = new("GRanges", + seqnames = new("Rle", values = structure(1:2, levels = c("chr1", + "chr2"), class = "factor"), lengths = c(3L, 1L), elementMetadata = NULL, + metadata = list()), ranges = new("IRanges", start = c(100L, + 1000L, 5000L, 3000L), width = c(101L, 4001L, 101L, 801L), + NAMES = NULL, elementType = "ANY", elementMetadata = NULL, + metadata = list()), strand = new("Rle", values = structure(1:2, levels = c("+", + "-", "*"), class = "factor"), lengths = c(2L, 2L), elementMetadata = NULL, + metadata = list()), seqinfo = new("Seqinfo", seqnames = c("chr1", + "chr2"), seqlengths = c(NA_integer_, NA_integer_), is_circular = c(NA, + NA), genome = c(NA_character_, NA_character_)), elementMetadata = new("DFrame", + rownames = NULL, nrows = 4L, elementType = "ANY", elementMetadata = NULL, + metadata = list(), listData = structure(list(), names = character(0))), + elementType = "ANY", metadata = list()), NAMES = NULL, elementMetadata = new("DFrame", + rownames = NULL, nrows = 2L, elementType = "ANY", elementMetadata = NULL, + metadata = list(), listData = list(name = c("bedpe_example1", + "bedpe_example2"), score = c(30L, 100L), pairdist = c(4900L, + NA))), metadata = list()) + ) + + add_pairdist(gi, colname = 's') |> expect_identical( + new("GInteractions", anchor1 = 1:2, anchor2 = 3:4, regions = new("GRanges", + seqnames = new("Rle", values = structure(1:2, levels = c("chr1", + "chr2"), class = "factor"), lengths = c(3L, 1L), elementMetadata = NULL, + metadata = list()), ranges = new("IRanges", start = c(100L, + 1000L, 5000L, 3000L), width = c(101L, 4001L, 101L, 801L), + NAMES = NULL, elementType = "ANY", elementMetadata = NULL, + metadata = list()), strand = new("Rle", values = structure(1:2, levels = c("+", + "-", "*"), class = "factor"), lengths = c(2L, 2L), elementMetadata = NULL, + metadata = list()), seqinfo = new("Seqinfo", seqnames = c("chr1", + "chr2"), seqlengths = c(NA_integer_, NA_integer_), is_circular = c(NA, + NA), genome = c(NA_character_, NA_character_)), elementMetadata = new("DFrame", + rownames = NULL, nrows = 4L, elementType = "ANY", elementMetadata = NULL, + metadata = list(), listData = structure(list(), names = character(0))), + elementType = "ANY", metadata = list()), NAMES = NULL, elementMetadata = new("DFrame", + rownames = NULL, nrows = 2L, elementType = "ANY", elementMetadata = NULL, + metadata = list(), listData = list(name = c("bedpe_example1", + "bedpe_example2"), score = c(30L, 100L), s = c(4900L, + NA))), metadata = list()) + ) + + gr <- read.table(text = " + chr1 100 200 + chr1 5000 5100 + chr1 1000 5000 + chr2 3000 3800", + col.names = c( + "seqnames", "start", "end" + )) |> plyranges::as_granges() + + pair_granges(gr) |> expect_identical( + new("GInteractions", anchor1 = c(1L, 1L, 1L, 3L, 3L, 2L), anchor2 = c(3L, + 2L, 4L, 2L, 4L, 4L), regions = new("GRanges", seqnames = new("Rle", + values = structure(1:2, levels = c("chr1", "chr2"), class = "factor"), + lengths = c(3L, 1L), elementMetadata = NULL, metadata = list()), + ranges = new("IRanges", start = c(100L, 1000L, 5000L, 3000L + ), width = c(101L, 4001L, 101L, 801L), NAMES = NULL, elementType = "ANY", + elementMetadata = NULL, metadata = list()), strand = new("Rle", + values = structure(3L, levels = c("+", "-", "*"), class = "factor"), + lengths = 4L, elementMetadata = NULL, metadata = list()), + seqinfo = new("Seqinfo", seqnames = c("chr1", "chr2"), seqlengths = c(NA_integer_, + NA_integer_), is_circular = c(NA, NA), genome = c(NA_character_, + NA_character_)), elementMetadata = new("DFrame", rownames = NULL, + nrows = 4L, elementType = "ANY", elementMetadata = NULL, + metadata = list(), listData = structure(list(), names = character(0))), + elementType = "ANY", metadata = list()), NAMES = NULL, elementMetadata = new("DFrame", + rownames = NULL, nrows = 6L, elementType = "ANY", elementMetadata = NULL, + metadata = list(), listData = structure(list(), names = character(0))), + metadata = list() + ) + ) +})