Analysis and visualization R scripts of my research 「Genome-wide profiling of circular RNAs in the hybridization of two elite inbred lines of Gossypium hirsutum」
Relationships of expressed circRNA in fiber leaf and ovule
- The upset plot was generated by TBtools.
- The histogram representing the size of the each set is transformed into a stack barplot produced by ggplot.
- The origination of circRNA:
pie diagram
. - CircRNA length density:
ggplot2::geom_density
. - CircRNA distribution on upland cotton chromosome:
ggplot2::geom_bar
- All circRNA expression profile:
pheatmap
- data:
log10(TPM+1)
- scale:
z-score by "row"
- data:
Tissue specific of circRNA expression
- Method:
WGCNA
- Input data:
readcount of junction_site
- Normalization method:
varianceStabilizingTransformation
- Considering
datExpr
included informations from different tissue, sample heterogeneity is inevitable, but our goal is to classify genes into different tissue, and we are not interested with "hub genes",so there is no need to demand the network should bescale-free
. - However, the final result demonstrate that we did get the appropriate power, and GS and MM also showed a significant correlation。
- GO enrichment results from TBtools.
Method:
## 2. function to judge expressed or not with cutoff cpm < 0 & cpm > 1 in at least two of three samples of each cultivar
AddTags = function(cpm){
for (i in 1:nrow(cpm)) {
## Hybrid
if (sum(cpm[i,1:3] > 1) > 1) {
cpm[i,10] <- "Ture"
}else if (sum(cpm[i,1:3] < ) > 1) {
cpm[i,10] <- "False"
}else {
cpm[i,10] <- "None"
}
## Maternal
if (sum(cpm[i,4:6] > 1) > 1) {
cpm[i,11] <- "Ture"
}else if (sum(cpm[i,4:6] < ) > 1) {
cpm[i,11] <- "False"
}else {
cpm[i,11] <- "None"
}
## Paternal
if (sum(cpm[i,7:9] > 1) > 1) {
cpm[i,12] <- "Ture"
}else if (sum(cpm[i,7:9] < ) > 1) {
cpm[i,12] <- "False"
}else {
cpm[i,12] <- "None"
}
}
return(cpm)
}
## 3. based on the sample tag, divide the circRNAs into SPE and NAE categories.
SPEandNAEfilter = function(cpm){
## 01. add ture or false tag
cpm = AddTags(cpm)
colnames(cpm)[(ncol(cpm)-2):ncol(cpm)] = c("H.tag","M.tag","P.tag") # change column names
## 02. add tags for each circRNA if it belongs to SPE.
cpm[,13] = case_when(
cpm$M.tag == "Ture" & cpm$P.tag == "False" ~ "SPE-Maternal",
cpm$M.tag == "False" & cpm$P.tag == "Ture" ~ "SPE-Paternal"
)
## 03. same as SPE for NAE
cpm[,14] = case_when(
cpm$M.tag == "False" & cpm$P.tag == "False" & cpm$H.tag == "Ture" ~ "NAE-Hybrid",
cpm$M.tag == "Ture" & cpm$P.tag == "Ture" & cpm$H.tag == "False" ~ "NAE-Parent",
cpm$M.tag == "False" & cpm$P.tag == "Ture" & cpm$H.tag == "False" ~ "NAE-Parent",
cpm$M.tag == "Ture" & cpm$P.tag == "False" & cpm$H.tag == "False" ~ "NAE-Parent",
)
## finish work, add rownames and circID tag
colnames(cpm)[(ncol(cpm)-1):ncol(cpm)] = c("SPE","NAE")
cpm = data.frame(row.names = rawcount$circID,
circID = rawcount$circID,
cpm)
return(cpm)
}
Figure 2
- UpsetPlot:
TBtools
- Heatmap:
pheatmap
- Half-box-half-jitter:
ggpol::geom_boxjitter
- Gene Structure and CircRNA Structure:
ggplot2::geom_segment
- qRT-PCR result boxplot:
ggplot2::geom_boxplot
Expression Pattern from Guanjing Hu-Wendellab
classDominance<-function(TvsMid, TvsP1, TvsP2, P1vsP2, log2fc.threshold=0, reverseTvsP =FALSE, Pnames=c("Maternal","Paternal"))
{
# Hybrid/polyploid vs Mid parental val
TvsMid <- data.frame(TvsMid[,c("log2FoldChange", "lfcSE", "pvalue")])
names(TvsMid) <- c("TvsMid", "TvsMid.SE", "TvsMid.pvalue")
# Hybrid/polyploid vs Parent 1
TvsP1 <- data.frame(TvsP1[,c("log2FoldChange", "lfcSE", "pvalue")])
names(TvsP1) <- c("TvsP1", "TvsP1.SE", "TvsP1.pvalue")
# Hybrid/polyploid vs Parent 2
TvsP2 <- data.frame(TvsP2[,c("log2FoldChange", "lfcSE", "pvalue")])
names(TvsP2) <- c("TvsP2", "TvsP2.SE", "TvsP2.pvalue")
# Parent 1 vs Parent 2
P1vsP2 <- data.frame(P1vsP2[,c("log2FoldChange", "lfcSE", "pvalue")])
names(P1vsP2) <- c("P1vsP2", "P1vsP2.SE", "P1vsP2.pvalue")
if(reverseTvsP==TRUE){
TvsP1$TvsP1 = -TvsP1$TvsP1
TvsP2$TvsP2 = -TvsP2$TvsP2
}
tbl = cbind(TvsMid, TvsP1, TvsP2, P1vsP2)
tbl$TvsMid[is.na(tbl$TvsMid)] =0
tbl$TvsP1[is.na(tbl$TvsP1)] =0
tbl$TvsP2[is.na(tbl$TvsP2)] =0
tbl$P1vsP2[is.na(tbl$P1vsP2)] =0
# judge
tbl$additivity <- ifelse(abs(tbl$TvsMid)>log2fc.threshold & tbl$TvsMid.pvalue<0 & !is.na(tbl$TvsMid.pvalue), "T!=Mid", "T=Mid")
tbl$TvsP1.reg <- ifelse(abs(tbl$TvsP1)>log2fc.threshold & tbl$TvsP1.pvalue<0 & !is.na(tbl$TvsP1.pvalue), "T!=P1", "T=P1")
tbl$TvsP2.reg <- ifelse(abs(tbl$TvsP2)>log2fc.threshold & tbl$TvsP2.pvalue<0 & !is.na(tbl$TvsP2.pvalue), "T!=P2", "T=P2")
tbl$P1vsP2.reg <- ifelse(abs(tbl$P1vsP2)>log2fc.threshold & tbl$P1vsP2.pvalue<0 & !is.na(tbl$P1vsP2.pvalue), ifelse(tbl$P1vsP2>log2fc.threshold & tbl$P1vsP2.pvalue<0 & !is.na(tbl$P1vsP2.pvalue), "P1>P2","P1<P2"), "P1=P2")
# together
tbl$class <- paste(tbl$P1vsP2.reg, tbl$additivity, tbl$TvsP1.reg, tbl$TvsP2.reg, sep=";")
# assign category
tbl$category = "7.Other"
tbl$category[grep("T=Mid",tbl$class)] = "1.Additivity" # hybrid=Mid while P1!=P2
tbl$category[grep("P1=P2;T=Mid",tbl$class)] = "6.Conserved" # P1=P2=Mid=hybrid
tbl$category[grep("P1>P2;T!=Mid;T=P1;T!=P2",tbl$class)] = paste0("2.",Pnames[1],"-dominant, higher")
tbl$category[grep("P1<P2;T!=Mid;T=P1;T!=P2",tbl$class)] = paste0("2.",Pnames[1],"-dominant, lower")
tbl$category[grep("P1>P2;T!=Mid;T!=P1;T=P2",tbl$class)] = paste0("3.",Pnames[2],"-dominant, lower")
tbl$category[grep("P1<P2;T!=Mid;T!=P1;T=P2",tbl$class)] = paste0("3.",Pnames[2],"-dominant, higher")
tbl$category[grepl("T!=Mid;T!=P1;T!=P2",tbl$class) & tbl$TvsP1>0 & tbl$TvsP2>0 & tbl$P1vsP2>0] = paste0("4.Transgressive Up: ",Pnames[1]," higher")
tbl$category[grepl("T!=Mid;T!=P1;T!=P2",tbl$class) & tbl$TvsP1>0 & tbl$TvsP2>0 & tbl$P1vsP2<0] = paste0("4.Transgressive Up: ",Pnames[2]," higher")
tbl$category[grepl("T!=Mid;T!=P1;T!=P2",tbl$class) & tbl$TvsP1<0 & tbl$TvsP2<0 & tbl$P1vsP2>0] = paste0("5.Transgressive Down: ",Pnames[1]," higher")
tbl$category[grepl("T!=Mid;T!=P1;T!=P2",tbl$class) & tbl$TvsP1<0 & tbl$TvsP2<0 & tbl$P1vsP2<0] = paste0("5.Transgressive Down: ",Pnames[2]," higher")
return(tbl)
}
Notice: It seems Transgressive up/down p1=p2 was missing. but it has been included. because when judge Transgress pattern
The rank of P1 and P2 is only judged by the TPM value, but not corrected by pvalue or log2fc.
- FourQuadrantChart:
ggplot2::gemo_point
non-additive circRNAs were labeled.
From: Past, present, and future of circRNAs by Ines Lucia Patop, Stas Wüst and Sebastian Kadener
miRNA was the bridge of the network, connected the circRNA and mRNA
- Circos plot:
circlize
- ceRNA network:
gephi
- Protein-protein interaction: STRING