-
Notifications
You must be signed in to change notification settings - Fork 1
/
bioanalyzer_profile.R
95 lines (76 loc) · 3.09 KB
/
bioanalyzer_profile.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
require(plyr)
#
# Functions
#
# Read Sample CSV and do some fixing on the Data Frame
ReadBioanalyzerCSV = function(x) {
csv <- read.csv(x, skip = 17, stringsAsFactors = F)
csv <- csv[seq(1,nrow(csv)-2,1),]
csv <- as.data.frame(apply(csv, 2, as.numeric))
csv <-csv[!is.na(csv$Time), ]
return(csv)
}
# Fix that nasty comma in the quote value
FixAndSplit = function(x) {
strsplit(
gsub("\"(\\d+),(\\d+.*)\"","\\1\\2", x, perl=TRUE),
",")
}
# Read Results file, use a Regex to extract the right table,
# split by lines and return a table
ReadLadderScale = function(f) {
ladder.list = strsplit(
gsub(".*Sample Name,Ladder.*Peak Table\r\n.*Time corrected area\r\n(.*)\r\n \r\nOverall.*","\\1",
readChar(f, file.info(f)$size)
), "\r\n"
)[[1]]
# Convert into a Data Frame after fixing the ugly comma-in-quotes issue
ladder.table = ldply(lapply(llply(ladder.list, FixAndSplit), ldply))
# Add colnames (not parsed due to encoding issues)
colnames(ladder.table) = c("Size","Concentration",
"Molarity","Observations",
"Area","Aligned_Migration_Time",
"Peak_Height","Peak_Width","PC_of_Total",
"Time_corrected_area")
ladder.table[,c(1,2,3,5,6,7,8,9,10)] = apply(ladder.table[,c(1,2,3,5,6,7,8,9,10)], 2, as.numeric)
return(ladder.table)
}
# Plot original and hypothetical scale
plotScale = function(l.p, l.s, fit.scale=300) {
par(pty="s", mfrow=c(1,2))
### Plot time against size.
plot(l.p, y=l.s, xlab="Ladder Positions", ylab="Ladder Sizes")
### The nonlinear relationship has to be found.
### In this case logistic function gives a good fit
plot(l.p, y=plogis(l.s, scale=fit.scale), xlab="Ladder Positions", ylab="Logistic function")
cr = cor(plogis(l.s, scale=fit.scale), l.p)
legend("topleft", paste("Corr = ", cr, sep=""), bty="n")
}
# Fit logistic function to the data
fitScale = function(ladder.positions, ladder.size, fit.scale=300) {
### Fit a model to transform between sizes and time
fit <- glm(plogis(ladder.size, scale=fit.scale) ~ ladder.positions)
return(fit)
}
# Adjust a sample using the fitting
adjustSample = function(sp, fit, r, fit.scale=300){
sample.subset <- sp[sp$Time >= r[1] & sp$Time <= r[2], ]
sample.df <- data.frame("ladder.positions" = sample.subset$Time)
fragment.sizes <- round(as.numeric(qlogis(predict(fit, sample.df), scale=fit.scale)))
return ( data.frame(Positions=fragment.sizes, Value=sample.subset$Value) )
}
# Wrapper using previous functions to read and parse bioanalyzer data
ReadBioanalyzerData = function(d) {
# Read all files of the directory.
# Must contain: 1 *Results.csv
# Must contain: at least 1 *Sample\d.csv
input.files = list.files(d, pattern="*.csv", full.names=T)
# Get sample files and names
sample.files = input.files[grepl("Sample", input.files)]
sample.tags = gsub(".*_(.*).csv","\\1", sample.files)
# Parse the ladder data from the *Results.csv
ladder.table = ReadLadderScale( input.files[grepl("Results", input.files)] )
# Load Sample files into a named list
sample.df = setNames(llply(sample.files, ReadBioanalyzerCSV), nm=sample.tags)
return(list(Ladder=ladder.table, Samples=sample.df))
}