Skip to content

Commit

Permalink
Merge pull request #10 from adamd3/fix/locus-tags
Browse files Browse the repository at this point in the history
Fix #9: Avoid mismatches when grepping gene details from gff
  • Loading branch information
adamd3 authored Jan 16, 2025
2 parents 71d520e + fac0b8d commit 699649e
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions bin/count_reads.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ ref_annot <- subset(ref_annot, type == "gene")

gene_attr <- stringr::str_split(ref_annot$attributes, ";")
locus_tags <- unlist(lapply(gene_attr, function(x) {
x[grepl("locus_tag", x)]
x[grepl("^locus_tag", x)]
}))
gene_biotypes <- unlist(lapply(gene_attr, function(x) {
x[grepl("gene_biotype", x)]
x[grepl("^gene_biotype", x)]
}))
common_gene_names <- unlist(lapply(gene_attr, function(x) {
x <- x[grepl("gene=", x)]
x <- x[grepl("^gene=", x)]
x[identical(x, character(0))] <- ""
x
}))
Expand All @@ -90,9 +90,9 @@ ref_gene_df <- data.frame(
gene_name = common_gene_names,
gene_length = gene_lengths
)
ref_gene_df$locus_tag <- gsub("locus_tag=", "", ref_gene_df$locus_tag)
ref_gene_df$biotype <- gsub("gene_biotype=", "", ref_gene_df$biotype)
ref_gene_df$gene_name <- gsub("gene=", "", ref_gene_df$gene_name)
ref_gene_df$locus_tag <- sub("^.*=", "", ref_gene_df$locus_tag)
ref_gene_df$biotype <- sub("^.*=", "", ref_gene_df$biotype)
ref_gene_df$gene_name <- sub("^.*=", "", ref_gene_df$gene_name)

write.table(
ref_gene_df, "ref_gene_df.tsv",
Expand Down

0 comments on commit 699649e

Please sign in to comment.