diff --git a/_freeze/chapters/chapter_05/execute-results/html.json b/_freeze/chapters/chapter_05/execute-results/html.json
index 51a22fd..759bcc3 100644
--- a/_freeze/chapters/chapter_05/execute-results/html.json
+++ b/_freeze/chapters/chapter_05/execute-results/html.json
@@ -1,8 +1,8 @@
 {
-  "hash": "66bb0a8b8cb5317fd5bed61c9c05612f",
+  "hash": "078e4ba8906e32b3de602a2ec43e1a3c",
   "result": {
     "engine": "knitr",
-    "markdown": "---\nexecute: \n  echo: true\n  eval: false\n  warning: false\n---\n\n\n# Runtime benchmark\n\nHere, we will perform a runtime benchmark for functions related to duplicate \nclassification and substitution rates calculation using model organisms.\n\nTo start, let's load the required data and packages.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nset.seed(123) # for reproducibility\n\n# Load required packages\nlibrary(doubletrouble)\nlibrary(here)\nlibrary(tidyverse)\nlibrary(patchwork)\n\nsource(here(\"code\", \"utils.R\"))\n\n# Load sample metadata for Ensembl instances\nload(here(\"products\", \"result_files\", \"metadata_all.rda\"))\n```\n:::\n\n::: {.cell}\n\n:::\n\n\n## Benchmark 1: `classify_gene_pairs()`\n\nHere, we will benchmark the performance of `classify_gene_pairs()`\nwith model organisms.\n\nFirst, let's get the genome and annotation data.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Create a data frame with names of model species and their Ensembl instances\nmodel_species <- data.frame(\n    species = c(\n        \"arabidopsis_thaliana\", \"caenorhabditis_elegans\", \n        \"homo_sapiens\", \"saccharomyces_cerevisiae\",\n        \"drosophila_melanogaster\", \"danio_rerio\"\n    ),\n    instance = c(\n        \"plants\", \"metazoa\", \"ensembl\", \"fungi\", \"metazoa\", \"ensembl\"\n    )\n)\n\n# For each organism, download data, and identify and classify duplicates\nmodel_duplicates <- lapply(seq_len(nrow(model_species)), function(x) {\n    \n    species <- model_species$species[x]\n    instance <- model_species$instance[x]\n    \n    # Get annotation\n    annot <- get_annotation(model_species[x, ], instance)\n    \n    # Get proteome and keep only primary transcripts\n    seq <- get_proteomes(model_species[x, ], instance)\n    seq <- filter_sequences(seq, annot)\n    \n    # Process data\n    pdata <- syntenet::process_input(seq, annot, filter_annotation = TRUE)\n    \n    # Perform DIAMOND search\n    outdir <- file.path(tempdir(), paste0(species, \"_intra\"))\n    diamond <- syntenet::run_diamond(\n        seq = pdata$seq,\n        compare = \"intraspecies\", \n        outdir = outdir,\n        threads = 4,\n        ... = \"--sensitive\"\n    )\n    \n    fs::dir_delete(outdir)\n    \n    # Classify duplicates - standard mode\n    start <- Sys.time()\n    duplicate_pairs <- classify_gene_pairs(\n            blast_list = diamond,\n            annotation = pdata$annotation,\n            scheme = \"standard\"\n        )[[1]]\n    end <- Sys.time()\n    runtime <- end - start\n    \n    return(runtime)\n})\nnames(model_duplicates) <- gsub(\"_\", \" \", str_to_title(model_species$species))\n\n# Summarize results in a table\nbenchmark_classification <- data.frame(\n    species = names(model_duplicates),\n    time_seconds = as.numeric(unlist(model_duplicates))\n)\n\n# Save results\nsave(\n    benchmark_classification, compress = \"xz\",\n    file = here(\"products\", \"result_files\", \"benchmark_classification.rda\")\n)\n```\n:::\n\n\n## Benchmark 2: `pairs2kaks()`\n\nNext, we will benchmark the performance of `pairs2kaks()` for\nduplicate pairs in the *Saccharomyces cerevisiae* genome.\nWe will do it using a single thread, and using parallelization (with\n4 and 8 threads).\n\nFirst of all, let's get the required data for `pairs2kaks()`.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Load duplicate pairs for S. cerevisiae\nload(here(\"products\", \"result_files\", \"fungi_duplicates.rda\"))\nscerevisiae_pairs <- fungi_duplicates[\"saccharomyces_cerevisiae\"]\n\n# Get CDS for S. cerevisiae\nscerevisiae_cds <- get_cds_ensembl(\"saccharomyces_cerevisiae\", \"fungi\")\n```\n:::\n\n\nNow, we can do the benchmark. \n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Parallel back-end: SerialParam (1 thread)\nstart <- Sys.time()\nkaks <- pairs2kaks(\n    scerevisiae_pairs, \n    scerevisiae_cds,\n    bp_param = BiocParallel::SerialParam()\n)\nend <- Sys.time()\nruntime_serial <- end - start\n\n# Parallel back-end: SnowParam, 4 threads\nstart <- Sys.time()\nkaks <- pairs2kaks(\n    scerevisiae_pairs, \n    scerevisiae_cds,\n    bp_param = BiocParallel::SnowParam(workers = 4)\n)\nend <- Sys.time()\nruntime_snow4 <- end - start\n\n# Parallel back-end: SnowParam, 8 threads\nstart <- Sys.time()\nkaks <- pairs2kaks(\n    scerevisiae_pairs, \n    scerevisiae_cds,\n    bp_param = BiocParallel::SnowParam(workers = 8)\n)\nend <- Sys.time()\nruntime_snow8 <- end - start\n\n# Summarize results in a table\nbenchmark_kaks <- data.frame(\n    `Back-end` = c(\"Serial\", \"Snow, 4 threads\", \"Snow, 8 threads\"),\n    Time_minutes = as.numeric(c(runtime_serial, runtime_snow4, runtime_snow8))\n) |>\n    dplyr::mutate(\n        Pairs_per_minute = nrow(scerevisiae_pairs[[1]]) / Time_minutes,\n        Pairs_per_second = nrow(scerevisiae_pairs[[1]]) / (Time_minutes * 60)\n    )\n\nsave(\n    benchmark_kaks, compress = \"xz\",\n    file = here(\"products\", \"result_files\", \"benchmark_kaks.rda\")\n)\n```\n:::\n\n\n## Benchmark 3: **doubletrouble** vs DupGen_finder\n\nHere, we will classify duplicate pairs in the *A. thaliana* genome \nusing __doubletrouble__ and DupGen_finder to assess if they produce the same \nresults, and compare their runtimes. \n\nFirst, let's get all data we need (proteomes, annotation, and DIAMOND tables).\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Get annotation\nsmeta <- data.frame(\n    species = c(\"arabidopsis_thaliana\", \"amborella_trichopoda\"),\n    instance = \"plants\"\n)\nannot <- get_annotation(smeta, \"plants\")\n\n# Get proteome and keep only primary transcripts\nseq <- get_proteomes(smeta, \"plants\")\nseq <- filter_sequences(seq, annot)\n    \n# Process data\npdata <- syntenet::process_input(seq, annot, filter_annotation = TRUE)\n    \n# Run intraspecies DIAMOND search\noutdir <- file.path(tempdir(), \"benchmark3_intra\")\ndiamond_intra <- syntenet::run_diamond(\n    seq = pdata$seq,\n    compare = \"intraspecies\", \n    outdir = outdir,\n    threads = 4,\n    ... = \"--sensitive\"\n)\nfs::dir_delete(outdir)\n\n# Run interspecies DIAMOND search\noutdir2 <- file.path(tempdir(), \"benchmark3_inter\")\ncompare_df <- data.frame(\n    query = \"arabidopsis.thaliana\", target = \"amborella.trichopoda\"\n)\n\ndiamond_inter <- syntenet::run_diamond(\n    seq = pdata$seq,\n    compare = compare_df, \n    outdir = outdir2,\n    threads = 4,\n    ... = \"--sensitive\"\n)\nfs::dir_delete(outdir2)\n```\n:::\n\n\nNow, let's classify duplicates with __doubletrouble__.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Classify duplicates with doubletrouble\nstart1 <- Sys.time()\ndups1 <- classify_gene_pairs(\n    blast_list = diamond_intra[2],\n    annotation = pdata$annotation,\n    blast_inter = diamond_inter,\n    scheme = \"extended\",\n    collinearity_dir = here(\"products\")\n)\nend1 <- Sys.time()\nruntime1 <- end1 - start1\n```\n:::\n\n\nNext, we will classify duplicates with DupGen_finder. For that, we will first\nexport input data in the required format.\n\n\n::: {.cell}\n\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\n# Export data\n## .blast files\nb1 <- diamond_intra$arabidopsis.thaliana_arabidopsis.thaliana |>\n    mutate(\n        query = str_replace_all(query, \"^ara_\", \"\"),\n        db = str_replace_all(db, \"^ara_\", \"\")\n    )\n\nb2 <- diamond_inter$arabidopsis.thaliana_amborella.trichopoda |>\n    mutate(\n        query = str_replace_all(query, \"^ara_\", \"\"),\n        db = str_replace_all(db, \"^amb_\", \"\")\n    )\n\nwrite_tsv(b1, file = file.path(tempdir(), \"Ath.blast\"), col_names = FALSE)\nwrite_tsv(b2, file = file.path(tempdir(), \"Ath_Atr.blast\"), col_names = FALSE)\n\n## .gff files\ngff1 <- pdata$annotation$arabidopsis.thaliana |>\n    as.data.frame() |>\n    mutate(gene = str_replace_all(gene, \"^ara_\", \"\")) |>\n    mutate(seqnames = str_replace_all(seqnames, \"ara_\", \"ara-\")) |>\n    dplyr::select(seqnames, gene, start, end)\n\ngff2 <- pdata$annotation$amborella.trichopoda |>\n    as.data.frame() |>\n    mutate(gene = str_replace_all(gene, \"^amb_\", \"\")) |>\n    mutate(seqnames = str_replace_all(seqnames, \"^amb_\", \"amb-\")) |>\n    dplyr::select(seqnames, gene, start, end)\n\ngff2 <- bind_rows(gff1, gff2)\n\nwrite_tsv(gff1, file = file.path(tempdir(), \"Ath.gff\"), col_names = FALSE)\nwrite_tsv(gff2, file = file.path(tempdir(), \"Ath_Atr.gff\"), col_names = FALSE)\n\n# Classify duplicates with DupGen_finder\nargs = c(\n    \"-i\", tempdir(), \"-t Ath -c Atr\",\n    \"-o\", file.path(tempdir(), \"results\"),\n    \"-e 1e-10\"\n)\n\nstart2 <- Sys.time()\nsystem2(\"DupGen_finder.pl\", args = args)\nend2 <- Sys.time()\nruntime2 <- end2 - start2\n```\n:::\n\n\nNow, let's compare both algorithms in terms of runtime and results.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Load `DupGen_finder.pl` results\nfiles <- c(\n    \"Ath.wgd.pairs\", \"Ath.tandem.pairs\", \"Ath.proximal.pairs\",\n    \"Ath.transposed.pairs\", \"Ath.dispersed.pairs\"\n)\nfiles <- file.path(tempdir(), \"results\", files)\nnames(files) <- c(\"SD\", \"TD\", \"PD\", \"TRD\", \"DD\")\ndups2 <- Reduce(rbind, lapply(seq_along(files), function(x) {\n    d <- read_tsv(files[x], show_col_types = FALSE) |>\n        mutate(type = names(files)[x]) |>\n        select(1, 3, type) |>\n        as.data.frame()\n    \n    names(d)[c(1,2)] <- c(\"dup1\", \"dup2\")\n    \n    return(d)\n}))\n\n# Compare runtime\ncomp_runtime <- data.frame(doubletrouble = runtime1, DupGen_finder = runtime2)\n\n# Compare number of gene pairs per category\ncomp_results <- inner_join(\n    count(dups1$arabidopsis.thaliana, type) |> \n        dplyr::rename(n_doubletrouble = n),\n    count(dups2, type) |>\n        dplyr::rename(n_DupGen_finder = n),\n    by = \"type\"\n)\ncomp_results <- bind_rows(\n    comp_results, \n    data.frame(\n        type = \"Total\", \n        n_doubletrouble = sum(comp_results$n_doubletrouble),\n        n_DupGen_finder = sum(comp_results$n_DupGen_finder)\n    )\n)\n```\n:::\n\n::: {.cell}\n\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\nlist(runtime = comp_runtime, results = comp_results)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n$runtime\n  doubletrouble DupGen_finder\n1  3.71474 secs 3.373682 secs\n\n$results\n   type n_doubletrouble n_DupGen_finder\n1    SD            4329            4355\n2    TD            2075            2131\n3    PD            2789             896\n4   TRD            6703            5941\n5    DD           31589           17834\n6 Total           47485           31157\n```\n\n\n:::\n:::\n\n\nOverall, results are similar, but there are important differences. In terms\nof runtime, there is no significant difference (this is the runtime of a single\nrun, so there's some stochasticity). In terms of results, the numbers\nof SD, TD, and TRD pairs are similar, but there are more pronounced differences\nfor PD and DD pairs. In particular, the total number of paralogous pairs differs\nbetween algorithms. When we remove rows from the DIAMOND output based on \nthe E-value threshold and remove self (e.g., \"gene1-gene1\") and redundant \nhits (e.g., \"gene1-gene2\" and \"gene2-gene1\"), we get the following total \nnumber of pairs:\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Get total number of paralog pairs from DIAMOND output\nevalue <- 1e-10\ndmd <- diamond_intra$arabidopsis.thaliana_arabidopsis.thaliana\n\nall_paralogs <- lapply(list(dmd), function(x) {\n    fpair <- x[x$evalue <= evalue, c(1, 2)]\n    fpair <- fpair[fpair[, 1] != fpair[, 2], ]\n    fpair <- fpair[!duplicated(t(apply(fpair, 1, sort))), ]\n    names(fpair) <- c(\"dup1\", \"dup2\")\n    return(fpair)\n})[[1]]\n```\n:::\n\n::: {.cell}\n\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\nnrow(all_paralogs)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n[1] 47485\n```\n\n\n:::\n:::\n\n\nThe total number of paralogous pairs in the DIAMOND output is the same as\nsum of classes for __doubletrouble__, but greater than the sum of classes\nfor __DupGen_finder__, indicating that the latter probably does some additional\n(undocumented) filtering before classifying gene pairs, or it could be a bug.\n\nFinally, since the number of PD pairs identified by __doubletrouble__ is much\ngreater than the number of PD pairs identified by __DupGen_finder__, we will \nexplore a few of these PD pairs so check whether __doubletrouble__ \nmisclassified them.\n\n\n::: {.cell}\n\n```{.r .cell-code}\npd1 <- dups1$arabidopsis.thaliana |>\n    filter(type == \"PD\")\n\npd2 <- dups2 |>\n    filter(type == \"PD\")\n\n# Get all pairs in `pd1` and `pd2`\np1 <- t(apply(pd1[, 1:2], 1, sort)) |>\n    as.data.frame() |>\n    mutate(V1 = str_replace_all(V1, \"^ara_\", \"\")) |>\n    mutate(V2 = str_replace_all(V2, \"^ara_\", \"\")) |>\n    mutate(pair_string = str_c(V1, V2, sep = \"-\")) |>\n    pull(pair_string)\n\np2 <- t(apply(pd2[, 1:2], 1, sort)) |>\n    as.data.frame() |>\n    mutate(pair_string = str_c(V1, V2, sep = \"-\")) |>\n    pull(pair_string)\n\n# Sample PD pairs from doubletrouble that are not PD pairs in DupGen_finder\nexamples <- p1[!p1 %in% p2] |> head(n = 5)\n\n# Check whether they are PD pairs or not\nath_annot <- pdata$annotation$arabidopsis.thaliana\nrange_examples <- lapply(examples, function(x) {\n    g1 <- paste0(\"ara_\", strsplit(x, \"-\")[[1]][1])\n    g2 <- paste0(\"ara_\", strsplit(x, \"-\")[[1]][2])\n    ranges <- ath_annot[ath_annot$gene %in% c(g1, g2)]\n    return(ranges)\n})\n```\n:::\n\n::: {.cell}\n\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\nrange_examples\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n[[1]]\nGRanges object with 2 ranges and 1 metadata column:\n      seqnames        ranges strand |          gene\n         <Rle>     <IRanges>  <Rle> |   <character>\n  128    ara_1 428650-430720      - | ara_AT1G02220\n  130    ara_1 437860-439559      - | ara_AT1G02250\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n\n[[2]]\nGRanges object with 2 ranges and 1 metadata column:\n      seqnames        ranges strand |          gene\n         <Rle>     <IRanges>  <Rle> |   <character>\n  127    ara_1 427548-427811      - | ara_AT1G02210\n  130    ara_1 437860-439559      - | ara_AT1G02250\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n\n[[3]]\nGRanges object with 2 ranges and 1 metadata column:\n        seqnames        ranges strand |          gene\n           <Rle>     <IRanges>  <Rle> |   <character>\n  17117    ara_4 656407-659178      - | ara_AT4G01520\n  17120    ara_4 673862-676445      - | ara_AT4G01550\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n\n[[4]]\nGRanges object with 2 ranges and 1 metadata column:\n        seqnames            ranges strand |          gene\n           <Rle>         <IRanges>  <Rle> |   <character>\n  15329    ara_3 17882465-17884515      + | ara_AT3G48290\n  15332    ara_3 17887996-17889942      + | ara_AT3G48310\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n\n[[5]]\nGRanges object with 2 ranges and 1 metadata column:\n        seqnames            ranges strand |          gene\n           <Rle>         <IRanges>  <Rle> |   <character>\n  25845    ara_5 21378702-21379744      + | ara_AT5G52720\n  25847    ara_5 21382482-21383432      + | ara_AT5G52740\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n```\n\n\n:::\n:::\n\n\nIn these first five examples of pairs that are classified as PD pairs by\n__doubletrouble__, but not by __DupGen_finder__, we can see based on the\nnumbers in row names that they are indeed very close, separated by only a few \ngenes (<10). Thus, they are true PD pairs that __DupGen_finder__ failed \nto classify as PD pairs, or removed in their undocumented filtering (described\nabove).\n\n## Session info {.unnumbered}\n\nThis document was created under the following conditions:\n\n\n::: {.cell}\n::: {.cell-output .cell-output-stdout}\n\n```\n─ Session info ───────────────────────────────────────────────────────────────\n setting  value\n version  R version 4.3.2 (2023-10-31)\n os       Ubuntu 22.04.3 LTS\n system   x86_64, linux-gnu\n ui       X11\n language (EN)\n collate  en_US.UTF-8\n ctype    en_US.UTF-8\n tz       Europe/Brussels\n date     2024-04-18\n pandoc   3.1.1 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)\n\n─ Packages ───────────────────────────────────────────────────────────────────\n package              * version     date (UTC) lib source\n abind                  1.4-5       2016-07-21 [1] CRAN (R 4.3.2)\n ade4                   1.7-22      2023-02-06 [1] CRAN (R 4.3.2)\n AnnotationDbi          1.64.1      2023-11-03 [1] Bioconductor\n ape                    5.7-1       2023-03-13 [1] CRAN (R 4.3.2)\n Biobase                2.62.0      2023-10-24 [1] Bioconductor\n BiocFileCache          2.10.1      2023-10-26 [1] Bioconductor\n BiocGenerics           0.48.1      2023-11-01 [1] Bioconductor\n BiocIO                 1.12.0      2023-10-24 [1] Bioconductor\n BiocParallel           1.37.0      2024-01-19 [1] Github (Bioconductor/BiocParallel@79a1b2d)\n biomaRt                2.58.2      2024-01-30 [1] Bioconductor 3.18 (R 4.3.2)\n Biostrings             2.70.2      2024-01-28 [1] Bioconductor 3.18 (R 4.3.2)\n bit                    4.0.5       2022-11-15 [1] CRAN (R 4.3.2)\n bit64                  4.0.5       2020-08-30 [1] CRAN (R 4.3.2)\n bitops                 1.0-7       2021-04-24 [1] CRAN (R 4.3.2)\n blob                   1.2.4       2023-03-17 [1] CRAN (R 4.3.2)\n cachem                 1.0.8       2023-05-01 [1] CRAN (R 4.3.2)\n cli                    3.6.2       2023-12-11 [1] CRAN (R 4.3.2)\n coda                   0.19-4.1    2024-01-31 [1] CRAN (R 4.3.2)\n codetools              0.2-19      2023-02-01 [4] CRAN (R 4.2.2)\n colorspace             2.1-0       2023-01-23 [1] CRAN (R 4.3.2)\n crayon                 1.5.2       2022-09-29 [1] CRAN (R 4.3.2)\n curl                   5.2.0       2023-12-08 [1] CRAN (R 4.3.2)\n DBI                    1.2.1       2024-01-12 [1] CRAN (R 4.3.2)\n dbplyr                 2.4.0       2023-10-26 [1] CRAN (R 4.3.2)\n DelayedArray           0.28.0      2023-10-24 [1] Bioconductor\n digest                 0.6.34      2024-01-11 [1] CRAN (R 4.3.2)\n doParallel             1.0.17      2022-02-07 [1] CRAN (R 4.3.2)\n doubletrouble        * 1.3.4       2024-02-05 [1] Bioconductor\n dplyr                * 1.1.4       2023-11-17 [1] CRAN (R 4.3.2)\n evaluate               0.23        2023-11-01 [1] CRAN (R 4.3.2)\n fansi                  1.0.6       2023-12-08 [1] CRAN (R 4.3.2)\n fastmap                1.1.1       2023-02-24 [1] CRAN (R 4.3.2)\n filelock               1.0.3       2023-12-11 [1] CRAN (R 4.3.2)\n forcats              * 1.0.0       2023-01-29 [1] CRAN (R 4.3.2)\n foreach                1.5.2       2022-02-02 [1] CRAN (R 4.3.2)\n generics               0.1.3       2022-07-05 [1] CRAN (R 4.3.2)\n GenomeInfoDb           1.38.6      2024-02-08 [1] Bioconductor 3.18 (R 4.3.2)\n GenomeInfoDbData       1.2.11      2023-12-21 [1] Bioconductor\n GenomicAlignments      1.38.2      2024-01-16 [1] Bioconductor 3.18 (R 4.3.2)\n GenomicFeatures        1.54.3      2024-01-31 [1] Bioconductor 3.18 (R 4.3.2)\n GenomicRanges          1.54.1      2023-10-29 [1] Bioconductor\n ggnetwork              0.5.13      2024-02-14 [1] CRAN (R 4.3.2)\n ggplot2              * 3.5.0       2024-02-23 [1] CRAN (R 4.3.2)\n glue                   1.7.0       2024-01-09 [1] CRAN (R 4.3.2)\n gtable                 0.3.4       2023-08-21 [1] CRAN (R 4.3.2)\n here                 * 1.0.1       2020-12-13 [1] CRAN (R 4.3.2)\n hms                    1.1.3       2023-03-21 [1] CRAN (R 4.3.2)\n htmltools              0.5.7       2023-11-03 [1] CRAN (R 4.3.2)\n htmlwidgets            1.6.4       2023-12-06 [1] CRAN (R 4.3.2)\n httr                   1.4.7       2023-08-15 [1] CRAN (R 4.3.2)\n igraph                 2.0.1.1     2024-01-30 [1] CRAN (R 4.3.2)\n intergraph             2.0-4       2024-02-01 [1] CRAN (R 4.3.2)\n IRanges                2.36.0      2023-10-24 [1] Bioconductor\n iterators              1.0.14      2022-02-05 [1] CRAN (R 4.3.2)\n jsonlite               1.8.8       2023-12-04 [1] CRAN (R 4.3.2)\n KEGGREST               1.42.0      2023-10-24 [1] Bioconductor\n knitr                  1.45        2023-10-30 [1] CRAN (R 4.3.2)\n lattice                0.22-5      2023-10-24 [4] CRAN (R 4.3.1)\n lifecycle              1.0.4       2023-11-07 [1] CRAN (R 4.3.2)\n lubridate            * 1.9.3       2023-09-27 [1] CRAN (R 4.3.2)\n magrittr               2.0.3       2022-03-30 [1] CRAN (R 4.3.2)\n MASS                   7.3-60      2023-05-04 [4] CRAN (R 4.3.1)\n Matrix                 1.6-3       2023-11-14 [4] CRAN (R 4.3.2)\n MatrixGenerics         1.14.0      2023-10-24 [1] Bioconductor\n matrixStats            1.2.0       2023-12-11 [1] CRAN (R 4.3.2)\n mclust                 6.0.1       2023-11-15 [1] CRAN (R 4.3.2)\n memoise                2.0.1       2021-11-26 [1] CRAN (R 4.3.2)\n MSA2dist               1.7.3       2024-03-14 [1] gitlab (mpievolbio-it/msa2dist@7558ab2)\n munsell                0.5.0       2018-06-12 [1] CRAN (R 4.3.2)\n network                1.18.2      2023-12-05 [1] CRAN (R 4.3.2)\n networkD3              0.4         2017-03-18 [1] CRAN (R 4.3.2)\n nlme                   3.1-163     2023-08-09 [4] CRAN (R 4.3.1)\n patchwork            * 1.2.0       2024-01-08 [1] CRAN (R 4.3.2)\n pheatmap               1.0.12      2019-01-04 [1] CRAN (R 4.3.2)\n pillar                 1.9.0       2023-03-22 [1] CRAN (R 4.3.2)\n pkgconfig              2.0.3       2019-09-22 [1] CRAN (R 4.3.2)\n png                    0.1-8       2022-11-29 [1] CRAN (R 4.3.2)\n prettyunits            1.2.0       2023-09-24 [1] CRAN (R 4.3.2)\n progress               1.2.3       2023-12-06 [1] CRAN (R 4.3.2)\n purrr                * 1.0.2       2023-08-10 [1] CRAN (R 4.3.2)\n R6                     2.5.1       2021-08-19 [1] CRAN (R 4.3.2)\n rappdirs               0.3.3       2021-01-31 [1] CRAN (R 4.3.2)\n RColorBrewer           1.1-3       2022-04-03 [1] CRAN (R 4.3.2)\n Rcpp                   1.0.12      2024-01-09 [1] CRAN (R 4.3.2)\n RCurl                  1.98-1.14   2024-01-09 [1] CRAN (R 4.3.2)\n readr                * 2.1.5       2024-01-10 [1] CRAN (R 4.3.2)\n restfulr               0.0.15      2022-06-16 [1] CRAN (R 4.3.2)\n rjson                  0.2.21      2022-01-09 [1] CRAN (R 4.3.2)\n rlang                  1.1.3       2024-01-10 [1] CRAN (R 4.3.2)\n rmarkdown              2.25        2023-09-18 [1] CRAN (R 4.3.2)\n rprojroot              2.0.4       2023-11-05 [1] CRAN (R 4.3.2)\n Rsamtools              2.18.0      2023-10-24 [1] Bioconductor\n RSQLite                2.3.5       2024-01-21 [1] CRAN (R 4.3.2)\n rstudioapi             0.15.0      2023-07-07 [1] CRAN (R 4.3.2)\n rtracklayer            1.62.0      2023-10-24 [1] Bioconductor\n S4Arrays               1.2.0       2023-10-24 [1] Bioconductor\n S4Vectors              0.40.2      2023-11-23 [1] Bioconductor 3.18 (R 4.3.2)\n scales                 1.3.0       2023-11-28 [1] CRAN (R 4.3.2)\n seqinr                 4.2-36      2023-12-08 [1] CRAN (R 4.3.2)\n sessioninfo            1.2.2       2021-12-06 [1] CRAN (R 4.3.2)\n SparseArray            1.2.4       2024-02-11 [1] Bioconductor 3.18 (R 4.3.2)\n statnet.common         4.9.0       2023-05-24 [1] CRAN (R 4.3.2)\n stringi                1.8.3       2023-12-11 [1] CRAN (R 4.3.2)\n stringr              * 1.5.1       2023-11-14 [1] CRAN (R 4.3.2)\n SummarizedExperiment   1.32.0      2023-10-24 [1] Bioconductor\n syntenet               1.4.0       2023-10-24 [1] Bioconductor\n tibble               * 3.2.1       2023-03-20 [1] CRAN (R 4.3.2)\n tidyr                * 1.3.1       2024-01-24 [1] CRAN (R 4.3.2)\n tidyselect             1.2.0       2022-10-10 [1] CRAN (R 4.3.2)\n tidyverse            * 2.0.0       2023-02-22 [1] CRAN (R 4.3.2)\n timechange             0.3.0       2024-01-18 [1] CRAN (R 4.3.2)\n tzdb                   0.4.0       2023-05-12 [1] CRAN (R 4.3.2)\n utf8                   1.2.4       2023-10-22 [1] CRAN (R 4.3.2)\n vctrs                  0.6.5       2023-12-01 [1] CRAN (R 4.3.2)\n withr                  3.0.0       2024-01-16 [1] CRAN (R 4.3.2)\n xfun                   0.42        2024-02-08 [1] CRAN (R 4.3.2)\n XML                    3.99-0.16.1 2024-01-22 [1] CRAN (R 4.3.2)\n xml2                   1.3.6       2023-12-04 [1] CRAN (R 4.3.2)\n XVector                0.42.0      2023-10-24 [1] Bioconductor\n yaml                   2.3.8       2023-12-11 [1] CRAN (R 4.3.2)\n zlibbioc               1.48.0      2023-10-24 [1] Bioconductor\n\n [1] /home/faalm/R/x86_64-pc-linux-gnu-library/4.3\n [2] /usr/local/lib/R/site-library\n [3] /usr/lib/R/site-library\n [4] /usr/lib/R/library\n\n──────────────────────────────────────────────────────────────────────────────\n```\n\n\n:::\n:::\n",
+    "markdown": "---\nexecute: \n  echo: true\n  eval: false\n  warning: false\n---\n\n\n# Runtime benchmark\n\nHere, we will perform a runtime benchmark for functions related to duplicate \nclassification and substitution rates calculation using model organisms.\n\nTo start, let's load the required data and packages.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nset.seed(123) # for reproducibility\n\n# Load required packages\nlibrary(doubletrouble)\nlibrary(here)\nlibrary(tidyverse)\nlibrary(patchwork)\n\nsource(here(\"code\", \"utils.R\"))\n\n# Load sample metadata for Ensembl instances\nload(here(\"products\", \"result_files\", \"metadata_all.rda\"))\n```\n:::\n\n::: {.cell}\n\n:::\n\n\n## Benchmark 1: `classify_gene_pairs()`\n\nHere, we will benchmark the performance of `classify_gene_pairs()`\nwith model organisms.\n\nFirst, let's get the genome and annotation data.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Create a data frame with names of model species and their Ensembl instances\nmodel_species <- data.frame(\n    species = c(\n        \"arabidopsis_thaliana\", \"caenorhabditis_elegans\", \n        \"homo_sapiens\", \"saccharomyces_cerevisiae\",\n        \"drosophila_melanogaster\", \"danio_rerio\"\n    ),\n    instance = c(\n        \"plants\", \"metazoa\", \"ensembl\", \"fungi\", \"metazoa\", \"ensembl\"\n    )\n)\n\n# For each organism, download data, and identify and classify duplicates\nmodel_duplicates <- lapply(seq_len(nrow(model_species)), function(x) {\n    \n    species <- model_species$species[x]\n    instance <- model_species$instance[x]\n    \n    # Get annotation\n    annot <- get_annotation(model_species[x, ], instance)\n    \n    # Get proteome and keep only primary transcripts\n    seq <- get_proteomes(model_species[x, ], instance)\n    seq <- filter_sequences(seq, annot)\n    \n    # Process data\n    pdata <- syntenet::process_input(seq, annot, filter_annotation = TRUE)\n    \n    # Perform DIAMOND search\n    outdir <- file.path(tempdir(), paste0(species, \"_intra\"))\n    diamond <- syntenet::run_diamond(\n        seq = pdata$seq,\n        compare = \"intraspecies\", \n        outdir = outdir,\n        threads = 4,\n        ... = \"--sensitive\"\n    )\n    \n    fs::dir_delete(outdir)\n    \n    # Classify duplicates - standard mode\n    start <- Sys.time()\n    duplicate_pairs <- classify_gene_pairs(\n            blast_list = diamond,\n            annotation = pdata$annotation,\n            scheme = \"standard\"\n        )[[1]]\n    end <- Sys.time()\n    runtime <- end - start\n    \n    return(runtime)\n})\nnames(model_duplicates) <- gsub(\"_\", \" \", str_to_title(model_species$species))\n\n# Summarize results in a table\nbenchmark_classification <- data.frame(\n    species = names(model_duplicates),\n    time_seconds = as.numeric(unlist(model_duplicates))\n)\n\n# Save results\nsave(\n    benchmark_classification, compress = \"xz\",\n    file = here(\"products\", \"result_files\", \"benchmark_classification.rda\")\n)\n```\n:::\n\n\n## Benchmark 2: `pairs2kaks()`\n\nNext, we will benchmark the performance of `pairs2kaks()` for\nduplicate pairs in the *Saccharomyces cerevisiae* genome.\nWe will do it using a single thread, and using parallelization (with\n4 and 8 threads).\n\nFirst of all, let's get the required data for `pairs2kaks()`.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Load duplicate pairs for S. cerevisiae\nload(here(\"products\", \"result_files\", \"fungi_duplicates.rda\"))\nscerevisiae_pairs <- fungi_duplicates[\"saccharomyces_cerevisiae\"]\n\n# Get CDS for S. cerevisiae\nscerevisiae_cds <- get_cds_ensembl(\"saccharomyces_cerevisiae\", \"fungi\")\n```\n:::\n\n\nNow, we can do the benchmark. \n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Parallel back-end: SerialParam (1 thread)\nstart <- Sys.time()\nkaks <- pairs2kaks(\n    scerevisiae_pairs, \n    scerevisiae_cds,\n    bp_param = BiocParallel::SerialParam()\n)\nend <- Sys.time()\nruntime_serial <- end - start\n\n# Parallel back-end: SnowParam, 4 threads\nstart <- Sys.time()\nkaks <- pairs2kaks(\n    scerevisiae_pairs, \n    scerevisiae_cds,\n    bp_param = BiocParallel::SnowParam(workers = 4)\n)\nend <- Sys.time()\nruntime_snow4 <- end - start\n\n# Parallel back-end: SnowParam, 8 threads\nstart <- Sys.time()\nkaks <- pairs2kaks(\n    scerevisiae_pairs, \n    scerevisiae_cds,\n    bp_param = BiocParallel::SnowParam(workers = 8)\n)\nend <- Sys.time()\nruntime_snow8 <- end - start\n\n# Summarize results in a table\nbenchmark_kaks <- data.frame(\n    `Back-end` = c(\"Serial\", \"Snow, 4 threads\", \"Snow, 8 threads\"),\n    Time_minutes = as.numeric(c(runtime_serial, runtime_snow4, runtime_snow8))\n) |>\n    dplyr::mutate(\n        Pairs_per_minute = nrow(scerevisiae_pairs[[1]]) / Time_minutes,\n        Pairs_per_second = nrow(scerevisiae_pairs[[1]]) / (Time_minutes * 60)\n    )\n\nsave(\n    benchmark_kaks, compress = \"xz\",\n    file = here(\"products\", \"result_files\", \"benchmark_kaks.rda\")\n)\n```\n:::\n\n\n## Benchmark 3: **doubletrouble** vs __DupGen_finder__\n\nHere, we will classify duplicate pairs in the *A. thaliana* genome \nusing __doubletrouble__ and DupGen_finder to assess if they produce the same \nresults, and compare their runtimes. \n\nFirst, let's get all data we need (proteomes, annotation, and DIAMOND tables).\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Get annotation\nsmeta <- data.frame(\n    species = c(\"arabidopsis_thaliana\", \"amborella_trichopoda\"),\n    instance = \"plants\"\n)\nannot <- get_annotation(smeta, \"plants\")\n\n# Get proteome and keep only primary transcripts\nseq <- get_proteomes(smeta, \"plants\")\nseq <- filter_sequences(seq, annot)\n    \n# Process data\npdata <- syntenet::process_input(seq, annot, filter_annotation = TRUE)\n    \n# Run intraspecies DIAMOND search\noutdir <- file.path(tempdir(), \"benchmark3_intra\")\ndiamond_intra <- syntenet::run_diamond(\n    seq = pdata$seq,\n    compare = \"intraspecies\", \n    outdir = outdir,\n    threads = 4,\n    ... = \"--sensitive\"\n)\nfs::dir_delete(outdir)\n\n# Run interspecies DIAMOND search\noutdir2 <- file.path(tempdir(), \"benchmark3_inter\")\ncompare_df <- data.frame(\n    query = \"arabidopsis.thaliana\", target = \"amborella.trichopoda\"\n)\n\ndiamond_inter <- syntenet::run_diamond(\n    seq = pdata$seq,\n    compare = compare_df, \n    outdir = outdir2,\n    threads = 4,\n    ... = \"--sensitive\"\n)\nfs::dir_delete(outdir2)\n```\n:::\n\n\nNow, let's classify duplicates with __doubletrouble__.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Classify duplicates with doubletrouble\nstart1 <- Sys.time()\ndups1 <- classify_gene_pairs(\n    blast_list = diamond_intra[2],\n    annotation = pdata$annotation,\n    blast_inter = diamond_inter,\n    scheme = \"extended\",\n    collinearity_dir = here(\"products\")\n)\nend1 <- Sys.time()\nruntime1 <- end1 - start1\n```\n:::\n\n\nNext, we will classify duplicates with DupGen_finder. For that, we will first\nexport input data in the required format.\n\n\n::: {.cell}\n\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\n# Export data\n## .blast files\nb1 <- diamond_intra$arabidopsis.thaliana_arabidopsis.thaliana |>\n    mutate(\n        query = str_replace_all(query, \"^ara_\", \"\"),\n        db = str_replace_all(db, \"^ara_\", \"\")\n    )\n\nb2 <- diamond_inter$arabidopsis.thaliana_amborella.trichopoda |>\n    mutate(\n        query = str_replace_all(query, \"^ara_\", \"\"),\n        db = str_replace_all(db, \"^amb_\", \"\")\n    )\n\nwrite_tsv(b1, file = file.path(tempdir(), \"Ath.blast\"), col_names = FALSE)\nwrite_tsv(b2, file = file.path(tempdir(), \"Ath_Atr.blast\"), col_names = FALSE)\n\n## .gff files\ngff1 <- pdata$annotation$arabidopsis.thaliana |>\n    as.data.frame() |>\n    mutate(gene = str_replace_all(gene, \"^ara_\", \"\")) |>\n    mutate(seqnames = str_replace_all(seqnames, \"ara_\", \"ara-\")) |>\n    dplyr::select(seqnames, gene, start, end)\n\ngff2 <- pdata$annotation$amborella.trichopoda |>\n    as.data.frame() |>\n    mutate(gene = str_replace_all(gene, \"^amb_\", \"\")) |>\n    mutate(seqnames = str_replace_all(seqnames, \"^amb_\", \"amb-\")) |>\n    dplyr::select(seqnames, gene, start, end)\n\ngff2 <- bind_rows(gff1, gff2)\n\nwrite_tsv(gff1, file = file.path(tempdir(), \"Ath.gff\"), col_names = FALSE)\nwrite_tsv(gff2, file = file.path(tempdir(), \"Ath_Atr.gff\"), col_names = FALSE)\n\n# Classify duplicates with DupGen_finder\nargs = c(\n    \"-i\", tempdir(), \"-t Ath -c Atr\",\n    \"-o\", file.path(tempdir(), \"results\"),\n    \"-e 1e-10\"\n)\n\nstart2 <- Sys.time()\nsystem2(\"DupGen_finder.pl\", args = args)\nend2 <- Sys.time()\nruntime2 <- end2 - start2\n```\n:::\n\n\nNow, let's compare both algorithms in terms of runtime and results.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Load `DupGen_finder.pl` results\nfiles <- c(\n    \"Ath.wgd.pairs\", \"Ath.tandem.pairs\", \"Ath.proximal.pairs\",\n    \"Ath.transposed.pairs\", \"Ath.dispersed.pairs\"\n)\nfiles <- file.path(tempdir(), \"results\", files)\nnames(files) <- c(\"SD\", \"TD\", \"PD\", \"TRD\", \"DD\")\ndups2 <- Reduce(rbind, lapply(seq_along(files), function(x) {\n    d <- read_tsv(files[x], show_col_types = FALSE) |>\n        mutate(type = names(files)[x]) |>\n        select(1, 3, type) |>\n        as.data.frame()\n    \n    names(d)[c(1,2)] <- c(\"dup1\", \"dup2\")\n    \n    return(d)\n}))\n\n# Compare runtime\ncomp_runtime <- data.frame(doubletrouble = runtime1, DupGen_finder = runtime2)\n\n# Compare number of gene pairs per category\ncomp_results <- inner_join(\n    count(dups1$arabidopsis.thaliana, type) |> \n        dplyr::rename(n_doubletrouble = n),\n    count(dups2, type) |>\n        dplyr::rename(n_DupGen_finder = n),\n    by = \"type\"\n)\ncomp_results <- bind_rows(\n    comp_results, \n    data.frame(\n        type = \"Total\", \n        n_doubletrouble = sum(comp_results$n_doubletrouble),\n        n_DupGen_finder = sum(comp_results$n_DupGen_finder)\n    )\n)\n```\n:::\n\n::: {.cell}\n\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\nlist(runtime = comp_runtime, results = comp_results)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n$runtime\n  doubletrouble DupGen_finder\n1  3.71474 secs 3.373682 secs\n\n$results\n   type n_doubletrouble n_DupGen_finder\n1    SD            4329            4355\n2    TD            2075            2131\n3    PD            2789             896\n4   TRD            6703            5941\n5    DD           31589           17834\n6 Total           47485           31157\n```\n\n\n:::\n:::\n\n\nOverall, results are similar, but there are important differences. In terms\nof runtime, there is no significant difference (this is the runtime of a single\nrun, so there's some stochasticity). In terms of results, the numbers\nof SD, TD, and TRD pairs are similar, but there are more pronounced differences\nfor PD and DD pairs. In particular, the total number of paralogous pairs differs\nbetween algorithms. When we remove rows from the DIAMOND output based on \nthe E-value threshold and remove self (e.g., \"gene1-gene1\") and redundant \nhits (e.g., \"gene1-gene2\" and \"gene2-gene1\"), we get the following total \nnumber of pairs:\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Get total number of paralog pairs from DIAMOND output\nevalue <- 1e-10\ndmd <- diamond_intra$arabidopsis.thaliana_arabidopsis.thaliana\n\nall_paralogs <- lapply(list(dmd), function(x) {\n    fpair <- x[x$evalue <= evalue, c(1, 2)]\n    fpair <- fpair[fpair[, 1] != fpair[, 2], ]\n    fpair <- fpair[!duplicated(t(apply(fpair, 1, sort))), ]\n    names(fpair) <- c(\"dup1\", \"dup2\")\n    return(fpair)\n})[[1]]\n```\n:::\n\n::: {.cell}\n\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\nnrow(all_paralogs)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n[1] 47485\n```\n\n\n:::\n:::\n\n\nThe total number of paralogous pairs in the DIAMOND output is the same as\nsum of classes for __doubletrouble__, but greater than the sum of classes\nfor __DupGen_finder__, indicating that the latter probably does some additional\n(undocumented) filtering before classifying gene pairs, or it could be a bug.\n\nFinally, since the number of PD pairs identified by __doubletrouble__ is much\ngreater than the number of PD pairs identified by __DupGen_finder__, we will \nexplore a few of these PD pairs so check whether __doubletrouble__ \nmisclassified them.\n\n\n::: {.cell}\n\n```{.r .cell-code}\npd1 <- dups1$arabidopsis.thaliana |>\n    filter(type == \"PD\")\n\npd2 <- dups2 |>\n    filter(type == \"PD\")\n\n# Get all pairs in `pd1` and `pd2`\np1 <- t(apply(pd1[, 1:2], 1, sort)) |>\n    as.data.frame() |>\n    mutate(V1 = str_replace_all(V1, \"^ara_\", \"\")) |>\n    mutate(V2 = str_replace_all(V2, \"^ara_\", \"\")) |>\n    mutate(pair_string = str_c(V1, V2, sep = \"-\")) |>\n    pull(pair_string)\n\np2 <- t(apply(pd2[, 1:2], 1, sort)) |>\n    as.data.frame() |>\n    mutate(pair_string = str_c(V1, V2, sep = \"-\")) |>\n    pull(pair_string)\n\n# Sample PD pairs from doubletrouble that are not PD pairs in DupGen_finder\nexamples <- p1[!p1 %in% p2] |> head(n = 5)\n\n# Check whether they are PD pairs or not\nath_annot <- pdata$annotation$arabidopsis.thaliana\nrange_examples <- lapply(examples, function(x) {\n    g1 <- paste0(\"ara_\", strsplit(x, \"-\")[[1]][1])\n    g2 <- paste0(\"ara_\", strsplit(x, \"-\")[[1]][2])\n    ranges <- ath_annot[ath_annot$gene %in% c(g1, g2)]\n    return(ranges)\n})\n```\n:::\n\n::: {.cell}\n\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\nrange_examples\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n[[1]]\nGRanges object with 2 ranges and 1 metadata column:\n      seqnames        ranges strand |          gene\n         <Rle>     <IRanges>  <Rle> |   <character>\n  128    ara_1 428650-430720      - | ara_AT1G02220\n  130    ara_1 437860-439559      - | ara_AT1G02250\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n\n[[2]]\nGRanges object with 2 ranges and 1 metadata column:\n      seqnames        ranges strand |          gene\n         <Rle>     <IRanges>  <Rle> |   <character>\n  127    ara_1 427548-427811      - | ara_AT1G02210\n  130    ara_1 437860-439559      - | ara_AT1G02250\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n\n[[3]]\nGRanges object with 2 ranges and 1 metadata column:\n        seqnames        ranges strand |          gene\n           <Rle>     <IRanges>  <Rle> |   <character>\n  17117    ara_4 656407-659178      - | ara_AT4G01520\n  17120    ara_4 673862-676445      - | ara_AT4G01550\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n\n[[4]]\nGRanges object with 2 ranges and 1 metadata column:\n        seqnames            ranges strand |          gene\n           <Rle>         <IRanges>  <Rle> |   <character>\n  15329    ara_3 17882465-17884515      + | ara_AT3G48290\n  15332    ara_3 17887996-17889942      + | ara_AT3G48310\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n\n[[5]]\nGRanges object with 2 ranges and 1 metadata column:\n        seqnames            ranges strand |          gene\n           <Rle>         <IRanges>  <Rle> |   <character>\n  25845    ara_5 21378702-21379744      + | ara_AT5G52720\n  25847    ara_5 21382482-21383432      + | ara_AT5G52740\n  -------\n  seqinfo: 7 sequences from an unspecified genome; no seqlengths\n```\n\n\n:::\n:::\n\n\nIn these first five examples of pairs that are classified as PD pairs by\n__doubletrouble__, but not by __DupGen_finder__, we can see based on the\nnumbers in row names that they are indeed very close, separated by only a few \ngenes (<10). Thus, they are true PD pairs that __DupGen_finder__ failed \nto classify as PD pairs, or removed in their undocumented filtering (described\nabove).\n\n## Session info {.unnumbered}\n\nThis document was created under the following conditions:\n\n\n::: {.cell}\n::: {.cell-output .cell-output-stdout}\n\n```\n─ Session info ───────────────────────────────────────────────────────────────\n setting  value\n version  R version 4.3.2 (2023-10-31)\n os       Ubuntu 22.04.3 LTS\n system   x86_64, linux-gnu\n ui       X11\n language (EN)\n collate  en_US.UTF-8\n ctype    en_US.UTF-8\n tz       Europe/Brussels\n date     2024-04-18\n pandoc   3.1.1 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)\n\n─ Packages ───────────────────────────────────────────────────────────────────\n package              * version     date (UTC) lib source\n abind                  1.4-5       2016-07-21 [1] CRAN (R 4.3.2)\n ade4                   1.7-22      2023-02-06 [1] CRAN (R 4.3.2)\n AnnotationDbi          1.64.1      2023-11-03 [1] Bioconductor\n ape                    5.7-1       2023-03-13 [1] CRAN (R 4.3.2)\n Biobase                2.62.0      2023-10-24 [1] Bioconductor\n BiocFileCache          2.10.1      2023-10-26 [1] Bioconductor\n BiocGenerics           0.48.1      2023-11-01 [1] Bioconductor\n BiocIO                 1.12.0      2023-10-24 [1] Bioconductor\n BiocParallel           1.37.0      2024-01-19 [1] Github (Bioconductor/BiocParallel@79a1b2d)\n biomaRt                2.58.2      2024-01-30 [1] Bioconductor 3.18 (R 4.3.2)\n Biostrings             2.70.2      2024-01-28 [1] Bioconductor 3.18 (R 4.3.2)\n bit                    4.0.5       2022-11-15 [1] CRAN (R 4.3.2)\n bit64                  4.0.5       2020-08-30 [1] CRAN (R 4.3.2)\n bitops                 1.0-7       2021-04-24 [1] CRAN (R 4.3.2)\n blob                   1.2.4       2023-03-17 [1] CRAN (R 4.3.2)\n cachem                 1.0.8       2023-05-01 [1] CRAN (R 4.3.2)\n cli                    3.6.2       2023-12-11 [1] CRAN (R 4.3.2)\n coda                   0.19-4.1    2024-01-31 [1] CRAN (R 4.3.2)\n codetools              0.2-19      2023-02-01 [4] CRAN (R 4.2.2)\n colorspace             2.1-0       2023-01-23 [1] CRAN (R 4.3.2)\n crayon                 1.5.2       2022-09-29 [1] CRAN (R 4.3.2)\n curl                   5.2.0       2023-12-08 [1] CRAN (R 4.3.2)\n DBI                    1.2.1       2024-01-12 [1] CRAN (R 4.3.2)\n dbplyr                 2.4.0       2023-10-26 [1] CRAN (R 4.3.2)\n DelayedArray           0.28.0      2023-10-24 [1] Bioconductor\n digest                 0.6.34      2024-01-11 [1] CRAN (R 4.3.2)\n doParallel             1.0.17      2022-02-07 [1] CRAN (R 4.3.2)\n doubletrouble        * 1.3.4       2024-02-05 [1] Bioconductor\n dplyr                * 1.1.4       2023-11-17 [1] CRAN (R 4.3.2)\n evaluate               0.23        2023-11-01 [1] CRAN (R 4.3.2)\n fansi                  1.0.6       2023-12-08 [1] CRAN (R 4.3.2)\n fastmap                1.1.1       2023-02-24 [1] CRAN (R 4.3.2)\n filelock               1.0.3       2023-12-11 [1] CRAN (R 4.3.2)\n forcats              * 1.0.0       2023-01-29 [1] CRAN (R 4.3.2)\n foreach                1.5.2       2022-02-02 [1] CRAN (R 4.3.2)\n generics               0.1.3       2022-07-05 [1] CRAN (R 4.3.2)\n GenomeInfoDb           1.38.6      2024-02-08 [1] Bioconductor 3.18 (R 4.3.2)\n GenomeInfoDbData       1.2.11      2023-12-21 [1] Bioconductor\n GenomicAlignments      1.38.2      2024-01-16 [1] Bioconductor 3.18 (R 4.3.2)\n GenomicFeatures        1.54.3      2024-01-31 [1] Bioconductor 3.18 (R 4.3.2)\n GenomicRanges          1.54.1      2023-10-29 [1] Bioconductor\n ggnetwork              0.5.13      2024-02-14 [1] CRAN (R 4.3.2)\n ggplot2              * 3.5.0       2024-02-23 [1] CRAN (R 4.3.2)\n glue                   1.7.0       2024-01-09 [1] CRAN (R 4.3.2)\n gtable                 0.3.4       2023-08-21 [1] CRAN (R 4.3.2)\n here                 * 1.0.1       2020-12-13 [1] CRAN (R 4.3.2)\n hms                    1.1.3       2023-03-21 [1] CRAN (R 4.3.2)\n htmltools              0.5.7       2023-11-03 [1] CRAN (R 4.3.2)\n htmlwidgets            1.6.4       2023-12-06 [1] CRAN (R 4.3.2)\n httr                   1.4.7       2023-08-15 [1] CRAN (R 4.3.2)\n igraph                 2.0.1.1     2024-01-30 [1] CRAN (R 4.3.2)\n intergraph             2.0-4       2024-02-01 [1] CRAN (R 4.3.2)\n IRanges                2.36.0      2023-10-24 [1] Bioconductor\n iterators              1.0.14      2022-02-05 [1] CRAN (R 4.3.2)\n jsonlite               1.8.8       2023-12-04 [1] CRAN (R 4.3.2)\n KEGGREST               1.42.0      2023-10-24 [1] Bioconductor\n knitr                  1.45        2023-10-30 [1] CRAN (R 4.3.2)\n lattice                0.22-5      2023-10-24 [4] CRAN (R 4.3.1)\n lifecycle              1.0.4       2023-11-07 [1] CRAN (R 4.3.2)\n lubridate            * 1.9.3       2023-09-27 [1] CRAN (R 4.3.2)\n magrittr               2.0.3       2022-03-30 [1] CRAN (R 4.3.2)\n MASS                   7.3-60      2023-05-04 [4] CRAN (R 4.3.1)\n Matrix                 1.6-3       2023-11-14 [4] CRAN (R 4.3.2)\n MatrixGenerics         1.14.0      2023-10-24 [1] Bioconductor\n matrixStats            1.2.0       2023-12-11 [1] CRAN (R 4.3.2)\n mclust                 6.0.1       2023-11-15 [1] CRAN (R 4.3.2)\n memoise                2.0.1       2021-11-26 [1] CRAN (R 4.3.2)\n MSA2dist               1.7.3       2024-03-14 [1] gitlab (mpievolbio-it/msa2dist@7558ab2)\n munsell                0.5.0       2018-06-12 [1] CRAN (R 4.3.2)\n network                1.18.2      2023-12-05 [1] CRAN (R 4.3.2)\n networkD3              0.4         2017-03-18 [1] CRAN (R 4.3.2)\n nlme                   3.1-163     2023-08-09 [4] CRAN (R 4.3.1)\n patchwork            * 1.2.0       2024-01-08 [1] CRAN (R 4.3.2)\n pheatmap               1.0.12      2019-01-04 [1] CRAN (R 4.3.2)\n pillar                 1.9.0       2023-03-22 [1] CRAN (R 4.3.2)\n pkgconfig              2.0.3       2019-09-22 [1] CRAN (R 4.3.2)\n png                    0.1-8       2022-11-29 [1] CRAN (R 4.3.2)\n prettyunits            1.2.0       2023-09-24 [1] CRAN (R 4.3.2)\n progress               1.2.3       2023-12-06 [1] CRAN (R 4.3.2)\n purrr                * 1.0.2       2023-08-10 [1] CRAN (R 4.3.2)\n R6                     2.5.1       2021-08-19 [1] CRAN (R 4.3.2)\n rappdirs               0.3.3       2021-01-31 [1] CRAN (R 4.3.2)\n RColorBrewer           1.1-3       2022-04-03 [1] CRAN (R 4.3.2)\n Rcpp                   1.0.12      2024-01-09 [1] CRAN (R 4.3.2)\n RCurl                  1.98-1.14   2024-01-09 [1] CRAN (R 4.3.2)\n readr                * 2.1.5       2024-01-10 [1] CRAN (R 4.3.2)\n restfulr               0.0.15      2022-06-16 [1] CRAN (R 4.3.2)\n rjson                  0.2.21      2022-01-09 [1] CRAN (R 4.3.2)\n rlang                  1.1.3       2024-01-10 [1] CRAN (R 4.3.2)\n rmarkdown              2.25        2023-09-18 [1] CRAN (R 4.3.2)\n rprojroot              2.0.4       2023-11-05 [1] CRAN (R 4.3.2)\n Rsamtools              2.18.0      2023-10-24 [1] Bioconductor\n RSQLite                2.3.5       2024-01-21 [1] CRAN (R 4.3.2)\n rstudioapi             0.15.0      2023-07-07 [1] CRAN (R 4.3.2)\n rtracklayer            1.62.0      2023-10-24 [1] Bioconductor\n S4Arrays               1.2.0       2023-10-24 [1] Bioconductor\n S4Vectors              0.40.2      2023-11-23 [1] Bioconductor 3.18 (R 4.3.2)\n scales                 1.3.0       2023-11-28 [1] CRAN (R 4.3.2)\n seqinr                 4.2-36      2023-12-08 [1] CRAN (R 4.3.2)\n sessioninfo            1.2.2       2021-12-06 [1] CRAN (R 4.3.2)\n SparseArray            1.2.4       2024-02-11 [1] Bioconductor 3.18 (R 4.3.2)\n statnet.common         4.9.0       2023-05-24 [1] CRAN (R 4.3.2)\n stringi                1.8.3       2023-12-11 [1] CRAN (R 4.3.2)\n stringr              * 1.5.1       2023-11-14 [1] CRAN (R 4.3.2)\n SummarizedExperiment   1.32.0      2023-10-24 [1] Bioconductor\n syntenet               1.4.0       2023-10-24 [1] Bioconductor\n tibble               * 3.2.1       2023-03-20 [1] CRAN (R 4.3.2)\n tidyr                * 1.3.1       2024-01-24 [1] CRAN (R 4.3.2)\n tidyselect             1.2.0       2022-10-10 [1] CRAN (R 4.3.2)\n tidyverse            * 2.0.0       2023-02-22 [1] CRAN (R 4.3.2)\n timechange             0.3.0       2024-01-18 [1] CRAN (R 4.3.2)\n tzdb                   0.4.0       2023-05-12 [1] CRAN (R 4.3.2)\n utf8                   1.2.4       2023-10-22 [1] CRAN (R 4.3.2)\n vctrs                  0.6.5       2023-12-01 [1] CRAN (R 4.3.2)\n withr                  3.0.0       2024-01-16 [1] CRAN (R 4.3.2)\n xfun                   0.42        2024-02-08 [1] CRAN (R 4.3.2)\n XML                    3.99-0.16.1 2024-01-22 [1] CRAN (R 4.3.2)\n xml2                   1.3.6       2023-12-04 [1] CRAN (R 4.3.2)\n XVector                0.42.0      2023-10-24 [1] Bioconductor\n yaml                   2.3.8       2023-12-11 [1] CRAN (R 4.3.2)\n zlibbioc               1.48.0      2023-10-24 [1] Bioconductor\n\n [1] /home/faalm/R/x86_64-pc-linux-gnu-library/4.3\n [2] /usr/local/lib/R/site-library\n [3] /usr/lib/R/site-library\n [4] /usr/lib/R/library\n\n──────────────────────────────────────────────────────────────────────────────\n```\n\n\n:::\n:::\n",
     "supporting": [],
     "filters": [
       "rmarkdown/pagebreak.lua"