-
Notifications
You must be signed in to change notification settings - Fork 4
/
final_summary_table_all.R
73 lines (65 loc) · 3.27 KB
/
final_summary_table_all.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/Rscript
# Script to parse the final summary table to append known lincRNA and/or known gene ID's
# Upendra Kumar Devisetty
# 12/04/16
library(dplyr)
files = list.files(path = "../Homology_Search", pattern = ".csv", full.names = TRUE)
filenames = list.files(path = "../Homology_Search", pattern = "*mod.annotation.*sense.gff", full.names = TRUE)
if (length(files) > 0 && length(filenames) > 0) {
myfiles = lapply(files, read.delim)
data2 = Reduce(function(x, y) merge(x, y, all=TRUE), myfiles)
full = read.csv("final_summary_table.csv", sep="\t")
full2 <- full[ , !names(full) %in% names(data2)]
final_final <- merge(data2, full2, by = 1)
full.n <- sub("id", "ID", names(full))
final_final_2 <- final_final[,full.n]
write.table(final_final_2, file = "final_summary_table.mod.csv", sep = "\t", quote = F, row.names = F)
final <- read.csv("final_summary_table.mod.csv", sep = "\t", stringsAsFactors = F)
info = file.info(filenames)
non_empty = rownames(info[info$size != 0, ])
for (f in non_empty) {
data <- read.csv(f, sep = "\t", header = F)
data2 <- as.data.frame(data %>% group_by(V2) %>% summarise(V18 = sub("^([^[:space:]]+).*","\\1",V18[1])))
for (i in 1:nrow(data2)) {
sp = unlist(strsplit(as.character(data2[i,1]), "_"))[[1]]
query = unlist(strsplit(as.character(data2[i,1]),"_"))[[2]]
sub = data2[i,2]
if ((sp %in% names(final)) & (query %in% final$ID)) {
res1 = final[grep(query, final$ID),][sp][,1]
res2 = paste(res1,sub,sep="_")
final[grep(query, final$ID),][sp][,1] <- res2
}
}
}
write.table(final, file = "final_summary_table.mod2.csv", quote = F, row.names = F, sep = "\t")
} else if (length(files) > 0 || length(filenames) < 0) {
myfiles = lapply(files, read.delim)
data2 = Reduce(function(x, y) merge(x, y, all=TRUE), myfiles)
full = read.csv("final_summary_table.csv", sep="\t")
full2 <- full[ , !names(full) %in% names(data2)]
final_final <- merge(data2, full2, by = 1)
full.n <- sub("id", "ID", names(full))
final_final_2 <- final_final[,full.n]
write.table(final_final2, file = "final_summary_table.mod.csv", sep = "\t", quote = F, row.names = F)
final <- read.csv("final_summary_table.mod.csv", sep = "\t", stringsAsFactors = F)
write.table(final, file = "final_summary_table.mod2.csv", quote = F, row.names = F, sep = "\t")
} else if (length(files) < 0 || length(filenames) > 0) {
final <- read.csv("final_summary_table.csv", sep = "\t", stringsAsFactors = F)
info = file.info(filenames)
non_empty = rownames(info[info$size != 0, ])
for (f in non_empty) {
data <- read.csv(f, sep = "\t", header = F)
data2 <- as.data.frame(data %>% group_by(V2) %>% summarise(V18 = sub("^([^[:space:]]+).*","\\1",V18[1])))
for (i in 1:nrow(data2)) {
sp = unlist(strsplit(as.character(data2[i,1]), "_"))[[1]]
query = unlist(strsplit(as.character(data2[i,1]),"_"))[[2]]
sub = data2[i,2]
if ((sp %in% names(final)) & (query %in% final$ID)) {
res1 = final[grep(query, final$id),][sp][,1]
res2 = paste(res1,sub,sep="_")
final[grep(query, final$id),][sp][,1] <- res2
}
}
}
write.table(final, file = "final_summary_table.mod.csv", quote = F, row.names = F, sep = "\t")
}