Skip to content

Commit

Permalink
Self organizing maps for flow
Browse files Browse the repository at this point in the history
  • Loading branch information
elijahedmondson committed Mar 23, 2022
0 parents commit 484e5a5
Show file tree
Hide file tree
Showing 29 changed files with 7,760 additions and 0 deletions.
841 changes: 841 additions & 0 deletions 0_FlowSOM.R

Large diffs are not rendered by default.

165 changes: 165 additions & 0 deletions 1_QC.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
library(flowCore)

removeMargins <- function(flowFrame,dimensions){
# Look op the accepted ranges for the dimensions
meta <- pData(flowFrame@parameters)
rownames(meta) <- meta[,"name"]

# Initialize variables
selection <- rep(TRUE,times=nrow(flowFrame))
e <- exprs(flowFrame)

# Make selection
for(d in dimensions){
selection <- selection &
e[,d] > max(meta[d,"minRange"],min(e[,d])) &
e[,d] < min(meta[d,"maxRange"],max(e[,d]))
}
return(selection)
}

selectGoodQuality <- function(dir,file,comp,toTransform,
checkMargins,
markersToPlot,
binTime = 0.1,
useCountLower = TRUE, useCountUpper= TRUE, countMax=600, countThreshold = 100,
timeMin = NULL, timeMax=NULL,
removeMargins = TRUE,
writeOutput = TRUE, outputDir = "QC",
showPlot = TRUE){
# Read the file
ff <- read.FCS(file.path(dir,file))
ff <- compensate(ff,comp)
colnames(ff)[which(colnames(ff) %in% colnames(comp))] <- paste0("Comp-",colnames(comp))

# Change time to seconds
toSec <- function(str){
str <- as.numeric(strsplit(str,':')[[1]])
60*60*str[1]+60*str[2]+str[3]
}
totalTime <-
toSec(ff@description$`$ETIM`)-toSec(ff@description$`$BTIM`)
time <-
matrix(exprs(ff)[,"Time"]*totalTime/max(exprs(ff)[,"Time"]),
ncol=1,dimnames = list(NULL,"Time"))
exprs(ff)[,"Time"] <- time

ff_t <- transform(ff,transformList(colnames(ff[,toTransform]),logicleTransform()))

# Split the data in bins
nIntervals <- round(totalTime/binTime)
cuts <- cut(exprs(ff_t)[,"Time"],breaks=nIntervals)
splitted <- split(as.data.frame(exprs(ff_t)),cuts)
timePoints <- as.numeric(gsub("\\(","",gsub(",.*","",names(splitted))))

# Logical vector to indicate good cells
selected <- rep(TRUE,nrow(ff))

counts <- sapply(splitted,function(flowDataFrame){dim(flowDataFrame)[1]})
if(showPlot){
plot(cbind(timePoints, counts),ylim=c(0,countMax))
}
if(max(counts)>countMax){warning(paste0("Counts in ",file," higher than ",countMax,"!"))}

# Counts over time
if(useCountLower | useCountUpper){
sampleMedian <- median(counts)
if(showPlot){
if(useCountLower) abline(h=sampleMedian-countThreshold,col="#FF0000",lwd=2)
if(useCountUpper) abline(h=sampleMedian+countThreshold,col="#FF0000",lwd=2)
}
selectedBins <- names(which((counts > sampleMedian-countThreshold) &
(counts < sampleMedian+countThreshold)))
selected <- selected & (cuts %in% selectedBins)
message(paste0(table(selected)["FALSE"]," cells removed due to counts"))
}

if(!is.null(timeMin)){
if(showPlot) abline(v=timeMin,col="#FF0000",lwd=2)
selected <- selected & (ff[,"Time"] > timeMin)
message(paste0(table((ff[,"Time"] > timeMin))["FALSE"]," cells removed due to time min"))
}

if(!is.null(timeMax)){
if(showPlot) abline(v=timeMax,col="#FF0000",lwd=2)
selected <- selected & (ff[,"Time"] < timeMax)
message(paste0(table((ff[,"Time"] < timeMax))["FALSE"]," cells removed due to time max"))
}

toRemove <- removeMargins(ff,checkMargins)
selected <- selected & toRemove
message(paste0(table(toRemove)["FALSE"]," cells removed due to removeMargins"))

ff_new <- cbind2(ff,matrix(as.numeric(selected),dimnames = list(NULL,"good")))
if(writeOutput){
suppressWarnings(dir.create(file.path(dir,outputDir)))
#write.table(selected,file=file.path(dir,"QC",paste0("QC_",file,".csv")),row.names = FALSE,col.names = c("Selected"))

suppressWarnings(write.FCS(ff_new, file.path(dir, outputDir, gsub(".fcs","_good.fcs",file))))
}

if(showPlot){

markers <- ff@parameters@data[,"desc"][markersToPlot]
markers[is.na(markers)] <- ff@parameters@data[,"name"][markersToPlot[is.na(markers)]]
names(markers) <- colnames(ff)[markersToPlot]

# Plot marker intensities over Time
for(marker in c(names(markers))){
medians <- sapply(splitted,function(flowDataFrame){
median(flowDataFrame[,marker])
})
qtl_25 <- sapply(splitted,function(flowDataFrame){
quantile(flowDataFrame[,marker],c(.25))
})
qtl_75 <- sapply(splitted,function(flowDataFrame){
quantile(flowDataFrame[,marker],c(.75))
})
if(marker=="FSC-A" || marker=="SSC-A"){
plot(exprs(ff_t)[,c("Time",marker)],
pch=".",col=c("#FF0000","#00000055")[selected+1],
#ylim=c(min,max),#axes=FALSE,
main=paste0(markers[marker]," (",gsub("Comp-","",gsub("-A$","",marker)),")"))
} else {
plot(exprs(ff_t)[,c("Time",marker)],
pch=".",col=c("#FF0000","#00000055")[selected+1],
ylim=c(-2,4),#axes=FALSE,
main=paste0(markers[marker]," (",gsub("Comp-","",gsub("-A$","",marker)),")"))
}
points(cbind(timePoints,medians),
pch="o",col="#3498db")
points(cbind(timePoints,qtl_25),
pch="o",col="#a6bddb")
points(cbind(timePoints,qtl_75),
pch="o",col="#a6bddb")
}
}

return(ff_new)
}

dir <- "FR-FCM-ZZQY"
files <- list.files(dir,pattern="Tube_[0-9]*.fcs$")
dir.create(file.path(dir,"QC"))


compensationFile = "FR-FCM-ZZQY/attachments/CompensationFlowJo.csv"
comp <- read.csv(compensationFile,row.names=1,check.names = FALSE)
colnames(comp) <- rownames(comp) <- gsub(" ::.*","",colnames(comp))

for(file in files){
print(file)
png(file.path(dir,"QC",paste0("QC_1s_",file,".png")),width=2000,height=1000)

layout(matrix(c(1,1,1,3,4,5,
1,1,1,3,4,5,
1,1,1,6,7,8,
2,2,2,6,7,8,
2,2,2,9,10,11,
2,2,2,9,10,11), 6, 6, byrow = TRUE))
ff <- selectGoodQuality(dir=dir,file=file,comp=comp,toTransform=7:19,
countMax = 10000, countThreshold = 2000,
checkMargins = c(1:6),binTime = 1,
markersToPlot = c(4,1,10,11,12,14,15,17,18,19))
dev.off()
}
37 changes: 37 additions & 0 deletions 1_plotSettings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

cellTypeColors <- c("B cells" = "#e74c3c",
"T cells" = "#a4cc2e",
"NK cells" = "#3498db",
"NK T cells" = "#a65628",
"Macrophages" = "#ff7f00",
"DCs" = "#FECC08",
"Neutrophils" = "#9b59b6")

markerNames <- c("AmCyan-A" = "Autofluorescence",
"Comp-BV711-A" = "CD64",
"Comp-PE-Cy5-A" = "CD19",
"Comp-PerCP-Cy5-5-A" = "MHCII",
"Comp-PE-Cy7-A" = "CD11c",
"Comp-BV605-A" = "CD11b",
"Comp-Alexa Fluor 700-A" = "Ly-6G",
"Comp-APC-A"="NK1.1",
"Comp-Pacific Blue-A"="CD49b",
"Comp-PE-A"="CD3",
"Comp-BV786-A"="FcERI")
markerColors <- c("AmCyan-A" = "#cc4c02",
"Comp-BV711-A" = "#fdae6b",
"Comp-PE-Cy5-A" = "#e41a1c",
"Comp-PerCP-Cy5-5-A" = "#ff7f00",
"Comp-PE-Cy7-A" = "#FECC08",
"Comp-BV605-A" = "#ae017e",
"Comp-Alexa Fluor 700-A" = "#9b59b6",
"Comp-APC-A"="#3498db",
"Comp-Pacific Blue-A"="#1f78b4",
"Comp-PE-A"="#a4cc2e",
"Comp-BV786-A"="#fb9a99")

circular_markerOrder = c(10,12,18,8,19,11,15,14,17)
grid_markerOrder = c(8,18,12,19,17,10,11,15,14)

save(cellTypeColors,markerColors,markerNames,circular_markerOrder,grid_markerOrder,
file="plotSettings.Rdata")
112 changes: 112 additions & 0 deletions 1_preprocessing.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@


library(flowCore)

######################
# Parameter settings #
######################

# Files to use
files = list.files(path="C:/Users/edmondsonef/Desktop/Humanized/Flow/1-05Jan2022", pattern = "Samples", ignore.case = TRUE)

# Compensation matrix
compensationFile = file.path("C:/Users/edmondsonef/Desktop/Humanized/Flow/1-05Jan2022/05Jan2022/table_compensation/autospill_compensation.csv")
colsToCompensate = c(7:17)

# GatingML file
gatingFile <- file.path(dataID,"attachments/151030_21-10-15_Tube_028.fcs_gates.xml")
# Indices of the gates of interest
gate_ids <- c( "Live single cells" = 3,
"Macrophages" = 4,
"B cells" = 6,
"T cells" = 16,
"NK cells" = 8,
"NK T cells" = 9,
"DCs" = 11,
"Neutrophils" = 13)
# Cell types to use as final labels
cellTypes <- c("B cells","T cells","NK cells","NK T cells",
"Macrophages","DCs","Neutrophils")

# Columns to use for analysis
colsToCluster = c(8,10:12,14:15,17:19)

###################
# Helper function #
###################

ProcessGatingML <- function(flowFrame,gatingFile,gateIDs,
cellTypes,silent=FALSE){
gating_xml <- XML::xmlToList(XML::xmlParse(gatingFile))
flowEnv <- new.env()
flowUtils::read.gatingML(gatingFile, flowEnv)
# A. Read the gates from xml
filterList <- list()
for(cellType in names(gateIDs)){
filterList[[cellType]] <- flowEnv[[
as.character(gating_xml[[gateIDs[cellType]]]$.attrs["id"])
]]
}
# B. Process the fcs file for all specified gates
results <- matrix(NA,nrow=nrow(flowFrame),ncol=length(gateIDs),
dimnames = list(NULL,names(gateIDs)))
for(cellType in names(gateIDs)){
if(!silent){message(paste0("Processing ",cellType))}
results[,cellType] <- flowCore::filter(flowFrame,
filterList[[cellType]])@subSet
}
# C. Assign one celltype to each cell
manual <- rep("Unknown",nrow(flowFrame))
for(celltype in cellTypes){
manual[results[,celltype]] <- celltype
}
manual <- factor(manual,levels = c("Unknown",cellTypes))

list("matrix"=results,"manual"=manual)
}

########################
# Preprocess the data #
########################

# Read the compensation matrix
comp <- read.csv(compensationFile,row.names=1,check.names = FALSE)
colnames(comp) <- rownames(comp) <- gsub(" ::.*","",colnames(comp))

for(file in files){
message(paste0("Processing ",file))
# Load the raw data
ff <- read.FCS(file)
# and compensate
ff <- compensate(ff,comp)
ff@description$SPILL <- comp
colnames(ff)[colsToCompensate] <- paste("Comp-",
colnames(ff)[colsToCompensate],sep="")

# Extract manual gating
gatingRes <- ProcessGatingML(ff, gatingFile, gate_ids, cellTypes)
gatingMatrix <- gatingRes$matrix
manual <- gatingRes$manual

# Cells to use as input for the algorithms
selected <- gatingMatrix[,"Live single cells"]

# Save selected cells to fcs file for algorithms which can only take
# a file as input. File is already compensated, so identity matrix as
# compensation matrix
new_comp <- diag(length(colnames(ff)[7:19]))
colnames(new_comp) <- colnames(ff)[7:19]
ff@description$SPILL <- new_comp
write.FCS(ff[selected,],
file=gsub(".fcs","_selected.fcs",file))
# Save also a subset of only the first 10.000 cells for faster processing
write.FCS(ff[selected,][1:10000,],
file=gsub(".fcs","_selected_10000.fcs",file))

# Transform the data
ff_t <- transform(ff,transformList(colnames(ff)[7:19],logicleTransform()))

# Save results so this step can be skipped next time
save(ff,ff_t,selected,manual,gatingMatrix,colsToCluster,
file=gsub(".fcs",".Rdata",file))
}
Loading

0 comments on commit 484e5a5

Please sign in to comment.