-
Notifications
You must be signed in to change notification settings - Fork 3
/
recovery_rates_schlaeppi.R
81 lines (56 loc) · 3.1 KB
/
recovery_rates_schlaeppi.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/opt/share/software/bin/Rscript
# scripts for 16S data analysis
#
# originally by Ruben Garrido-Oter
# garridoo@mpipz.mpg.de
args <- commandArgs(TRUE)
working_dir <- args[1]
results_dir <- args[2]
min_length <- as.numeric(args[3])
min_id <- as.numeric(args[4])
schlaeppi_otu_table.file <- paste(working_dir, "/schlaeppi/otu_table.txt", sep="")
schlaeppi_taxonomy.file <- paste(working_dir, "/schlaeppi/taxonomy.txt", sep="")
blast_results.file <- paste(results_dir, "/blast_results_schlaeppi.txt", sep="")
# load start inoculum OTU table and taxonomy infomration
schlaeppi_taxonomy <- read.table(schlaeppi_taxonomy.file, sep="\t", header=F, fill=T)
schlaeppi_otu_table <- read.table(schlaeppi_otu_table.file, sep="\t", header=T)
rownames(schlaeppi_otu_table) <- schlaeppi_otu_table[, 1]
schlaeppi_otu_table <- schlaeppi_otu_table[, -1]
# remove soil samples
idx <- !grepl("Soil", colnames(schlaeppi_otu_table))
schlaeppi_otu_table <- schlaeppi_otu_table[, idx]
# remove non-bacterial and Chloroflexi OTUs
schlaeppi_taxonomy <- schlaeppi_taxonomy[schlaeppi_taxonomy[, 2]=="k__Bacteria", ]
schlaeppi_taxonomy <- schlaeppi_taxonomy[schlaeppi_taxonomy[, 3]!="p__Chloroflexi", ]
idx <- rownames(schlaeppi_otu_table) %in% schlaeppi_taxonomy[, 1]
schlaeppi_otu_table <- schlaeppi_otu_table[idx, ]
# get top 100 OTUs and those with >.1% relative abundance
schlaeppi_otu_table_norm <- apply(schlaeppi_otu_table, 2, function(x) x/sum(x)*100)
idx <- apply(schlaeppi_otu_table_norm, 1, mean) > .1
#~ idx <- rowSums(schlaeppi_otu_table_norm > .1) == dim(schlaeppi_otu_table_norm)[2]
#~ idx <- rowSums(schlaeppi_otu_table_norm > .1) > 0
abu_otus <- rownames(schlaeppi_otu_table)[idx]
abu_otus <- gsub("^", "schlaeppi_", abu_otus)
top_100 <- names(sort(apply(schlaeppi_otu_table_norm, 1, mean), decreasing=T)[1:100])
top_100 <- gsub("^", "schlaeppi_", top_100)
# load blast results
classes <- c("factor", "factor", "numeric", "integer", "integer", "integer",
"integer", "integer", "integer", "integer", "numeric", "numeric")
blast_results <- read.table(blast_results.file, sep="\t", header=F, colClasses=classes)
blast_results <- blast_results[, c(1, 2, 3, 7, 8)]
colnames(blast_results) <- c("target", "query", "perc_id", "q_start", "q_stop")
blast_results$length <- blast_results$q_stop - blast_results$q_start + 1
# get hits that cover at least min_length of the query sequence at 97% identity
idx <- which(blast_results$perc_id >= min_id & blast_results$length >= min_length)
hits <- unique(blast_results$target[idx])
write.table(gsub(".*_OTU", "OTU", hits),
paste(results_dir, "/recovered_OTUs.txt", sep=""),
quote=F, col.names=F, row.names=F)
recovery_rate_abu <- sum(hits %in% abu_otus) * 100 / length(abu_otus)
recovery_rate_100 <- sum(hits %in% top_100) * 100 / length(top_100)
sink(paste(working_dir, "/recovery_rates.txt", sep=""), append=T)
cat("Schlaeppi et al.:\n")
cat(paste("top 100 OTUs: ", format(recovery_rate_100, digits=4), "%\n", sep=""))
cat(paste("abundant (> 0.1% R.A) OTUs: ", format(recovery_rate_abu, digits=4), "%\n", sep=""))
cat("----------------------------------\n")
sink()