Skip to content

Latest commit

 

History

History
1261 lines (1085 loc) · 41.2 KB

Fig1D_TAD_Overlap.md

File metadata and controls

1261 lines (1085 loc) · 41.2 KB
## Identify TADs and boundary regions
# Import the data
Zero <- read.table("Data/Interactions/TADs//D0.DI", quote="\"", comment.char="", skip=1)

## Call TADs on sDI
# Convert sDI to z-scores
Mean <- mean(Zero$V4)
SD <- sd(Zero$V4)
Zero$V4 <- (Zero$V4 - Mean)/SD
rm(Mean)
rm(SD)

All <- data.frame()
Bounds <- data.frame()
Chromosomes <- c(paste("chr",seq(1,19,by=1),sep=""),"chrX")
for (Chr in Chromosomes) {
  # Subset to a single chromosome
  Subset <- Zero[ Zero$V1 == Chr,]
  
  # Identify transition points (between positive and negative bias)
  Subset$Transition <- 0
  for (i in 1:(nrow(Subset)-1)) {
    SignCurr <- sign(Subset[i,4])
    SignNext <- sign(Subset[(i+1),4])
    if (SignCurr != SignNext) { Subset[c(i),"Transition"] <- 1}
  }
  
  # Identify each region of bias
  Region <- 1
  Subset$Region <- 0
  for (i in 1:nrow(Subset)) {
    Subset[i,"Region"] <- Region
    if (Subset[i,"Transition"] == 1) { Region <- Region + 1}
  }
  
  # Calculate the length of each region
  Subset$Count <- 1
  Counts <- aggregate(Subset$Count, by=list(Subset$Region), FUN="sum")
  colnames(Counts) <- c("Region","Length")
  Subset <- merge(Subset, Counts, by="Region")
  
  # Identify regions longer or equal to 10 bins
  Subset$Long <- 0
  Subset[ Subset$Length >= 10,"Long"] <- 1
  
  # Mark each region for sufficient bias and set the sign
  Subset$Bias <- 0
  Subset$Sign <- 0
  for (i in unique(Subset$Region)) {
    Tmp <- Subset[ Subset$Region == i,]
    if (max(abs(Tmp$V4)) >= 0.5) {
      Subset[ Subset$Region == i, "Bias"] <- 1
      Subset[ Subset$Region == i, "Sign"] <- sign(max(Tmp$V4))
    }
  }
  
  # Remove regions that are too short (< 10 bins = 50kb)
  Subset[ Subset$Length <= 10, "Bias"] <- 0
  
  # Identify biased regions
  PositiveRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == 1, "Region"])
  NegativeRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == -1, "Region"])
  
  ## Identify type 1 boundaries (left > right)
  # For each negative region, get the closest positive region
  T1 <- data.frame()
  for (i in 1:length(NegativeRegions)) {
    if (length(PositiveRegions[ PositiveRegions >= NegativeRegions[i]]) > 0) {
      T1[i,1] <- NegativeRegions[i]
      T1[i,2] <- min(PositiveRegions[ PositiveRegions >= NegativeRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T1 <- T1[ order(T1[,2], -T1[,1]),]
  T1 <- T1[ duplicated(T1[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T1$Keep <- 0
  T1[ T1[,2] == (T1[,1] + 1),"Keep"] <- 1
  Proceeding <- T1[ T1$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT1 <- T1[ T1$Keep == 0,]
  for (i in 1:nrow(GappedT1)) {
    Test <- Subset[ Subset$Region < GappedT1[i,2] & Subset$Region > GappedT1[i,1],]
    if (nrow(Test) <= 8) { GappedT1[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT1[i,"Keep"] <- 0 }
    if (GappedT1[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT1[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT1[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT1[ GappedT1$Keep == 1,]) > 0) {
  GappedT1Keep <- GappedT1[ GappedT1$Keep == 1, c(4,5,3)]
  colnames(GappedT1Keep) <- colnames(Proceeding)
  T1 <- rbind(GappedT1Keep, Proceeding)
  }
  T1 <- T1[ order(T1[,1]),]
  
  # Identify type 2 boundaries (right > left)
  T2 <- data.frame()
  for (i in 1:length(PositiveRegions)) {
    if (length(NegativeRegions[ NegativeRegions >= PositiveRegions[i]]) > 0) {
      T2[i,1] <- PositiveRegions[i]
      T2[i,2] <- min(NegativeRegions[ NegativeRegions >= PositiveRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T2 <- T2[ order(T2[,2], -T2[,1]),]
  T2 <- T2[ duplicated(T2[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T2$Keep <- 0
  T2[ T2[,2] == (T2[,1] + 1),"Keep"] <- 1
  Proceeding <- T2[ T2$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT2 <- T2[ T2$Keep == 0,]
  for (i in 1:nrow(GappedT2)) {
    Test <- Subset[ Subset$Region < GappedT2[i,2] & Subset$Region > GappedT2[i,1],]
    if (nrow(Test) <= 8) { GappedT2[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT2[i,"Keep"] <- 0 }
    if (GappedT2[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT2[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT2[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT2[ GappedT2$Keep == 1,]) > 0) {
    GappedT2Keep <- GappedT2[ GappedT2$Keep == 1, c(4,5,3)]
    colnames(GappedT2Keep) <- colnames(Proceeding)
    T2 <- rbind(GappedT2Keep, Proceeding)
  }
  T2 <- T2[ order(T2[,1]),]
  
  # Get non-boundary biased regions
  PositiveNonbias <- data.frame(Region = PositiveRegions[ !(PositiveRegions %in% T1[,2]) & !(PositiveRegions %in% GappedT1[ GappedT1$Keep == 1,2])])
  NegativeNonbias <- data.frame(Region = NegativeRegions[ !(NegativeRegions %in% T2[,2]) & !(NegativeRegions %in% GappedT2[ GappedT2$Keep == 1,2])])
  
  # Stich together T1 regions
  TADs <- data.frame()
  for (i in 1:(nrow(T1)-1)) {
    TADs[i,1] <- T1[i,2]
    TADs[i,2] <- T1[(i+1),2]
  }
  
  # Remove TADs that doesnt cross a T2 boundary or multiple
  TADs$Keep <- 0
  TADs$T2 <- 0
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,2]
    Tmp <- data.frame(T2 = T2[ T2[,2] >= Min & T2[,2] <= Max,])
    if (nrow(Tmp) == 1) { 
      TADs[i,"Keep"] <- 1
      TADs[i,"T2"] <- Tmp[1,1]
    }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have negative bias between first T1 and T2 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,4]
    Tmp <- data.frame(NegativeNonbias = NegativeNonbias[ NegativeNonbias > Min & NegativeNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have positive bias between T2 and second T1 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,4]
    Max <- TADs[i,2]
    Tmp <- data.frame(PositiveNonbias = PositiveNonbias[ PositiveNonbias > Min & PositiveNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate TAD boundaries
  TADs$chr <- as.character(Chr)
  for (i in 1:nrow(TADs)) {
    Tmp <- Subset[ Subset$Region == TADs[i,1],]
    TADs[i,"start"] <- Tmp[ which.max(Tmp[,5]),3]
    Tmp <- Subset[ Subset$Region == (TADs[i,2]-1),]
    TADs[i,"end"] <- Tmp[ which.min(Tmp[,5]),3]+50000
  }
  
  # Filter TADs in low coverage areas (+/- 20 bins)
  Subset$Forward <- Subset[2,3] - Subset[1,3]
  for (nr in 2:nrow(Subset)) { Subset[nr,"Forward"] <- Subset[nr,3] - Subset[(nr-1),3] }
  
  for (m in 1:nrow(TADs)) {
    Start <- as.numeric(TADs[m,6])-50000
    End <- as.numeric(TADs[m,7])+50000
    Tmp <- Subset
    Tmp <- Tmp[ Tmp[,3] >= Start & Tmp[,3] < End,]
    if (max(Tmp$Forward) > 5000) { TADs[m,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate T1 boundaries
  T1$chr <- as.character(Chr)
  for (i in 1:nrow(T1)) {
    Tmp <- Subset[ Subset$Region == T1[i,2],]
    T1[i,"boundary"] <- min(Tmp[,3])
  }
  
  # Finalize TADs
  TADs <- TADs[,c(5,6,7)]
  
  # Combine all TADs
  All <- rbind(All, TADs)
  
  # Save T1 Boundaries
  T1 <- T1[,c(4,5)]
  Bounds <- rbind(Bounds, T1)
}

# Rename the TAD calls
Zero_TADs <- All
Zero_Bounds <- Bounds

## Call TADs on 4 hour data
# Import the data
Zero <- read.table("Data/Interactions/TADs/H4.DI", quote="\"", comment.char="", skip=1)

## Call TADs on sDI
# Convert sDI to z-scores
Mean <- mean(Zero$V4)
SD <- sd(Zero$V4)
Zero$V4 <- (Zero$V4 - Mean)/SD
rm(Mean)
rm(SD)

Bounds <- data.frame()
All <- data.frame()
Chromosomes <- c(paste("chr",seq(1,19,by=1),sep=""),"chrX")
for (Chr in Chromosomes) {
  # Subset to a single chromosome
  Subset <- Zero[ Zero$V1 == Chr,]
  
  # Identify transition points (between positive and negative bias)
  Subset$Transition <- 0
  for (i in 1:(nrow(Subset)-1)) {
    SignCurr <- sign(Subset[i,4])
    SignNext <- sign(Subset[(i+1),4])
    if (SignCurr != SignNext) { Subset[c(i),"Transition"] <- 1}
  }
  
  # Identify each region of bias
  Region <- 1
  Subset$Region <- 0
  for (i in 1:nrow(Subset)) {
    Subset[i,"Region"] <- Region
    if (Subset[i,"Transition"] == 1) { Region <- Region + 1}
  }
  
  # Calculate the length of each region
  Subset$Count <- 1
  Counts <- aggregate(Subset$Count, by=list(Subset$Region), FUN="sum")
  colnames(Counts) <- c("Region","Length")
  Subset <- merge(Subset, Counts, by="Region")
  
  # Identify regions longer or equal to 10 bins
  Subset$Long <- 0
  Subset[ Subset$Length >= 10,"Long"] <- 1
  
  # Mark each region for sufficient bias and set the sign
  Subset$Bias <- 0
  Subset$Sign <- 0
  for (i in unique(Subset$Region)) {
    Tmp <- Subset[ Subset$Region == i,]
    if (max(abs(Tmp$V4)) >= 0.5) {
      Subset[ Subset$Region == i, "Bias"] <- 1
      Subset[ Subset$Region == i, "Sign"] <- sign(max(Tmp$V4))
    }
  }
  
  # Remove regions that are too short (< 10 bins = 50kb)
  Subset[ Subset$Length <= 10, "Bias"] <- 0
  
  # Identify biased regions
  PositiveRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == 1, "Region"])
  NegativeRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == -1, "Region"])
  
  ## Identify type 1 boundaries (left > right)
  # For each negative region, get the closest positive region
  T1 <- data.frame()
  for (i in 1:length(NegativeRegions)) {
    if (length(PositiveRegions[ PositiveRegions >= NegativeRegions[i]]) > 0) {
      T1[i,1] <- NegativeRegions[i]
      T1[i,2] <- min(PositiveRegions[ PositiveRegions >= NegativeRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T1 <- T1[ order(T1[,2], -T1[,1]),]
  T1 <- T1[ duplicated(T1[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T1$Keep <- 0
  T1[ T1[,2] == (T1[,1] + 1),"Keep"] <- 1
  Proceeding <- T1[ T1$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT1 <- T1[ T1$Keep == 0,]
  for (i in 1:nrow(GappedT1)) {
    Test <- Subset[ Subset$Region < GappedT1[i,2] & Subset$Region > GappedT1[i,1],]
    if (nrow(Test) <= 8) { GappedT1[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT1[i,"Keep"] <- 0 }
    if (GappedT1[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT1[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT1[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT1[ GappedT1$Keep == 1,]) > 0) {
    GappedT1Keep <- GappedT1[ GappedT1$Keep == 1, c(4,5,3)]
    colnames(GappedT1Keep) <- colnames(Proceeding)
    T1 <- rbind(GappedT1Keep, Proceeding)
  }
  T1 <- T1[ order(T1[,1]),]
  
  # Identify type 2 boundaries (right > left)
  T2 <- data.frame()
  for (i in 1:length(PositiveRegions)) {
    if (length(NegativeRegions[ NegativeRegions >= PositiveRegions[i]]) > 0) {
      T2[i,1] <- PositiveRegions[i]
      T2[i,2] <- min(NegativeRegions[ NegativeRegions >= PositiveRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T2 <- T2[ order(T2[,2], -T2[,1]),]
  T2 <- T2[ duplicated(T2[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T2$Keep <- 0
  T2[ T2[,2] == (T2[,1] + 1),"Keep"] <- 1
  Proceeding <- T2[ T2$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT2 <- T2[ T2$Keep == 0,]
  for (i in 1:nrow(GappedT2)) {
    Test <- Subset[ Subset$Region < GappedT2[i,2] & Subset$Region > GappedT2[i,1],]
    if (nrow(Test) <= 8) { GappedT2[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT2[i,"Keep"] <- 0 }
    if (GappedT2[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT2[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT2[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT2[ GappedT2$Keep == 1,]) > 0) {
    GappedT2Keep <- GappedT2[ GappedT2$Keep == 1, c(4,5,3)]
    colnames(GappedT2Keep) <- colnames(Proceeding)
    T2 <- rbind(GappedT2Keep, Proceeding)
  }
  T2 <- T2[ order(T2[,1]),]
  
  # Get non-boundary biased regions
  PositiveNonbias <- data.frame(Region = PositiveRegions[ !(PositiveRegions %in% T1[,2]) & !(PositiveRegions %in% GappedT1[ GappedT1$Keep == 1,2])])
  NegativeNonbias <- data.frame(Region = NegativeRegions[ !(NegativeRegions %in% T2[,2]) & !(NegativeRegions %in% GappedT2[ GappedT2$Keep == 1,2])])
  
  # Stich together T1 regions
  TADs <- data.frame()
  for (i in 1:(nrow(T1)-1)) {
    TADs[i,1] <- T1[i,2]
    TADs[i,2] <- T1[(i+1),2]
  }
  
  # Remove TADs that doesnt cross a T2 boundary or multiple
  TADs$Keep <- 0
  TADs$T2 <- 0
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,2]
    Tmp <- data.frame(T2 = T2[ T2[,2] >= Min & T2[,2] <= Max,])
    if (nrow(Tmp) == 1) { 
      TADs[i,"Keep"] <- 1
      TADs[i,"T2"] <- Tmp[1,1]
    }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have negative bias between first T1 and T2 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,4]
    Tmp <- data.frame(NegativeNonbias = NegativeNonbias[ NegativeNonbias > Min & NegativeNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have positive bias between T2 and second T1 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,4]
    Max <- TADs[i,2]
    Tmp <- data.frame(PositiveNonbias = PositiveNonbias[ PositiveNonbias > Min & PositiveNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate TAD boundaries
  TADs$chr <- as.character(Chr)
  for (i in 1:nrow(TADs)) {
    Tmp <- Subset[ Subset$Region == TADs[i,1],]
    TADs[i,"start"] <- Tmp[ which.max(Tmp[,5]),3]
    Tmp <- Subset[ Subset$Region == (TADs[i,2]-1),]
    TADs[i,"end"] <- Tmp[ which.min(Tmp[,5]),3]+50000
  }
  
  # Filter TADs in low coverage areas (+/- 20 bins)
  Subset$Forward <- Subset[2,3] - Subset[1,3]
  for (nr in 2:nrow(Subset)) { Subset[nr,"Forward"] <- Subset[nr,3] - Subset[(nr-1),3] }
  
  for (m in 1:nrow(TADs)) {
    Start <- as.numeric(TADs[m,6])-50000
    End <- as.numeric(TADs[m,7])+50000
    Tmp <- Subset
    Tmp <- Tmp[ Tmp[,3] >= Start & Tmp[,3] < End,]
    if (max(Tmp$Forward) > 5000) { TADs[m,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate T1 boundaries
  T1$chr <- as.character(Chr)
  for (i in 1:nrow(T1)) {
    Tmp <- Subset[ Subset$Region == T1[i,2],]
    T1[i,"boundary"] <- min(Tmp[,3])
  }
  
  # Finalize TADs
  TADs <- TADs[,c(5,6,7)]
  
  # Combine all TADs
  All <- rbind(All, TADs)
  
  # Save T1 Boundaries
  T1 <- T1[,c(4,5)]
  Bounds <- rbind(Bounds, T1)
}

# Rename the TAD calls
Four_TADs <- All
Four_Bounds <- Bounds

## Call TADs on D2 data
# Import the data
Zero <- read.table("Data/Interactions/TADs/D2.DI", quote="\"", comment.char="", skip=1)

## Call TADs on sDI
# Convert sDI to z-scores
Mean <- mean(Zero$V4)
SD <- sd(Zero$V4)
Zero$V4 <- (Zero$V4 - Mean)/SD
rm(Mean)
rm(SD)

Bounds <- data.frame()
All <- data.frame()
Chromosomes <- c(paste("chr",seq(1,19,by=1),sep=""),"chrX")
for (Chr in Chromosomes) {
  # Subset to a single chromosome
  Subset <- Zero[ Zero$V1 == Chr,]
  
  # Identify transition points (between positive and negative bias)
  Subset$Transition <- 0
  for (i in 1:(nrow(Subset)-1)) {
    SignCurr <- sign(Subset[i,4])
    SignNext <- sign(Subset[(i+1),4])
    if (SignCurr != SignNext) { Subset[c(i),"Transition"] <- 1}
  }
  
  # Identify each region of bias
  Region <- 1
  Subset$Region <- 0
  for (i in 1:nrow(Subset)) {
    Subset[i,"Region"] <- Region
    if (Subset[i,"Transition"] == 1) { Region <- Region + 1}
  }
  
  # Calculate the length of each region
  Subset$Count <- 1
  Counts <- aggregate(Subset$Count, by=list(Subset$Region), FUN="sum")
  colnames(Counts) <- c("Region","Length")
  Subset <- merge(Subset, Counts, by="Region")
  
  # Identify regions longer or equal to 10 bins
  Subset$Long <- 0
  Subset[ Subset$Length >= 10,"Long"] <- 1
  
  # Mark each region for sufficient bias and set the sign
  Subset$Bias <- 0
  Subset$Sign <- 0
  for (i in unique(Subset$Region)) {
    Tmp <- Subset[ Subset$Region == i,]
    if (max(abs(Tmp$V4)) >= 0.5) {
      Subset[ Subset$Region == i, "Bias"] <- 1
      Subset[ Subset$Region == i, "Sign"] <- sign(max(Tmp$V4))
    }
  }
  
  # Remove regions that are too short (< 10 bins = 50kb)
  Subset[ Subset$Length <= 10, "Bias"] <- 0
  
  # Identify biased regions
  PositiveRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == 1, "Region"])
  NegativeRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == -1, "Region"])
  
  ## Identify type 1 boundaries (left > right)
  # For each negative region, get the closest positive region
  T1 <- data.frame()
  for (i in 1:length(NegativeRegions)) {
    if (length(PositiveRegions[ PositiveRegions >= NegativeRegions[i]]) > 0) {
      T1[i,1] <- NegativeRegions[i]
      T1[i,2] <- min(PositiveRegions[ PositiveRegions >= NegativeRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T1 <- T1[ order(T1[,2], -T1[,1]),]
  T1 <- T1[ duplicated(T1[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T1$Keep <- 0
  T1[ T1[,2] == (T1[,1] + 1),"Keep"] <- 1
  Proceeding <- T1[ T1$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT1 <- T1[ T1$Keep == 0,]
  for (i in 1:nrow(GappedT1)) {
    Test <- Subset[ Subset$Region < GappedT1[i,2] & Subset$Region > GappedT1[i,1],]
    if (nrow(Test) <= 8) { GappedT1[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT1[i,"Keep"] <- 0 }
    if (GappedT1[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT1[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT1[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT1[ GappedT1$Keep == 1,]) > 0) {
    GappedT1Keep <- GappedT1[ GappedT1$Keep == 1, c(4,5,3)]
    colnames(GappedT1Keep) <- colnames(Proceeding)
    T1 <- rbind(GappedT1Keep, Proceeding)
  }
  T1 <- T1[ order(T1[,1]),]
  
  # Identify type 2 boundaries (right > left)
  T2 <- data.frame()
  for (i in 1:length(PositiveRegions)) {
    if (length(NegativeRegions[ NegativeRegions >= PositiveRegions[i]]) > 0) {
      T2[i,1] <- PositiveRegions[i]
      T2[i,2] <- min(NegativeRegions[ NegativeRegions >= PositiveRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T2 <- T2[ order(T2[,2], -T2[,1]),]
  T2 <- T2[ duplicated(T2[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T2$Keep <- 0
  T2[ T2[,2] == (T2[,1] + 1),"Keep"] <- 1
  Proceeding <- T2[ T2$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT2 <- T2[ T2$Keep == 0,]
  for (i in 1:nrow(GappedT2)) {
    Test <- Subset[ Subset$Region < GappedT2[i,2] & Subset$Region > GappedT2[i,1],]
    if (nrow(Test) <= 8) { GappedT2[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT2[i,"Keep"] <- 0 }
    if (GappedT2[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT2[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT2[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT2[ GappedT2$Keep == 1,]) > 0) {
    GappedT2Keep <- GappedT2[ GappedT2$Keep == 1, c(4,5,3)]
    colnames(GappedT2Keep) <- colnames(Proceeding)
    T2 <- rbind(GappedT2Keep, Proceeding)
  }
  T2 <- T2[ order(T2[,1]),]
  
  # Get non-boundary biased regions
  PositiveNonbias <- data.frame(Region = PositiveRegions[ !(PositiveRegions %in% T1[,2]) & !(PositiveRegions %in% GappedT1[ GappedT1$Keep == 1,2])])
  NegativeNonbias <- data.frame(Region = NegativeRegions[ !(NegativeRegions %in% T2[,2]) & !(NegativeRegions %in% GappedT2[ GappedT2$Keep == 1,2])])
  
  # Stich together T1 regions
  TADs <- data.frame()
  for (i in 1:(nrow(T1)-1)) {
    TADs[i,1] <- T1[i,2]
    TADs[i,2] <- T1[(i+1),2]
  }
  
  # Remove TADs that doesnt cross a T2 boundary or multiple
  TADs$Keep <- 0
  TADs$T2 <- 0
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,2]
    Tmp <- data.frame(T2 = T2[ T2[,2] >= Min & T2[,2] <= Max,])
    if (nrow(Tmp) == 1) { 
      TADs[i,"Keep"] <- 1
      TADs[i,"T2"] <- Tmp[1,1]
    }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have negative bias between first T1 and T2 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,4]
    Tmp <- data.frame(NegativeNonbias = NegativeNonbias[ NegativeNonbias > Min & NegativeNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have positive bias between T2 and second T1 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,4]
    Max <- TADs[i,2]
    Tmp <- data.frame(PositiveNonbias = PositiveNonbias[ PositiveNonbias > Min & PositiveNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate TAD boundaries
  TADs$chr <- as.character(Chr)
  for (i in 1:nrow(TADs)) {
    Tmp <- Subset[ Subset$Region == TADs[i,1],]
    TADs[i,"start"] <- Tmp[ which.max(Tmp[,5]),3]
    Tmp <- Subset[ Subset$Region == (TADs[i,2]-1),]
    TADs[i,"end"] <- Tmp[ which.min(Tmp[,5]),3]+50000
  }
  
  # Filter TADs in low coverage areas (+/- 20 bins)
  Subset$Forward <- Subset[2,3] - Subset[1,3]
  for (nr in 2:nrow(Subset)) { Subset[nr,"Forward"] <- Subset[nr,3] - Subset[(nr-1),3] }
  
  for (m in 1:nrow(TADs)) {
    Start <- as.numeric(TADs[m,6])-50000
    End <- as.numeric(TADs[m,7])+50000
    Tmp <- Subset
    Tmp <- Tmp[ Tmp[,3] >= Start & Tmp[,3] < End,]
    if (max(Tmp$Forward) > 5000) { TADs[m,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate T1 boundaries
  T1$chr <- as.character(Chr)
  for (i in 1:nrow(T1)) {
    Tmp <- Subset[ Subset$Region == T1[i,2],]
    T1[i,"boundary"] <- min(Tmp[,3])
  }
  
  # Finalize TADs
  TADs <- TADs[,c(5,6,7)]
  
  # Combine all TADs
  All <- rbind(All, TADs)
  
  # Save T1 Boundaries
  T1 <- T1[,c(4,5)]
  Bounds <- rbind(Bounds, T1)
}

# Rename the TAD calls
Two_TADs <- All
Two_Bounds <- Bounds

## Call TADs on D2 data
# Import the data
Zero <- read.table("Data/Interactions/TADs/ESC.DI", quote="\"", comment.char="", skip=1)

## Call TADs on sDI
# Convert sDI to z-scores
Mean <- mean(Zero$V4)
SD <- sd(Zero$V4)
Zero$V4 <- (Zero$V4 - Mean)/SD
rm(Mean)
rm(SD)

Bounds <- data.frame()
All <- data.frame()
Chromosomes <- c(paste("chr",seq(1,19,by=1),sep=""),"chrX")
for (Chr in Chromosomes) {
  # Subset to a single chromosome
  Subset <- Zero[ Zero$V1 == Chr,]
  
  # Identify transition points (between positive and negative bias)
  Subset$Transition <- 0
  for (i in 1:(nrow(Subset)-1)) {
    SignCurr <- sign(Subset[i,4])
    SignNext <- sign(Subset[(i+1),4])
    if (SignCurr != SignNext) { Subset[c(i),"Transition"] <- 1}
  }
  
  # Identify each region of bias
  Region <- 1
  Subset$Region <- 0
  for (i in 1:nrow(Subset)) {
    Subset[i,"Region"] <- Region
    if (Subset[i,"Transition"] == 1) { Region <- Region + 1}
  }
  
  # Calculate the length of each region
  Subset$Count <- 1
  Counts <- aggregate(Subset$Count, by=list(Subset$Region), FUN="sum")
  colnames(Counts) <- c("Region","Length")
  Subset <- merge(Subset, Counts, by="Region")
  
  # Identify regions longer or equal to 10 bins
  Subset$Long <- 0
  Subset[ Subset$Length >= 10,"Long"] <- 1
  
  # Mark each region for sufficient bias and set the sign
  Subset$Bias <- 0
  Subset$Sign <- 0
  for (i in unique(Subset$Region)) {
    Tmp <- Subset[ Subset$Region == i,]
    if (max(abs(Tmp$V4)) >= 0.5) {
      Subset[ Subset$Region == i, "Bias"] <- 1
      Subset[ Subset$Region == i, "Sign"] <- sign(max(Tmp$V4))
    }
  }
  
  # Remove regions that are too short (< 10 bins = 50kb)
  Subset[ Subset$Length <= 10, "Bias"] <- 0
  
  # Identify biased regions
  PositiveRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == 1, "Region"])
  NegativeRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == -1, "Region"])
  
  ## Identify type 1 boundaries (left > right)
  # For each negative region, get the closest positive region
  T1 <- data.frame()
  for (i in 1:length(NegativeRegions)) {
    if (length(PositiveRegions[ PositiveRegions >= NegativeRegions[i]]) > 0) {
      T1[i,1] <- NegativeRegions[i]
      T1[i,2] <- min(PositiveRegions[ PositiveRegions >= NegativeRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T1 <- T1[ order(T1[,2], -T1[,1]),]
  T1 <- T1[ duplicated(T1[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T1$Keep <- 0
  T1[ T1[,2] == (T1[,1] + 1),"Keep"] <- 1
  Proceeding <- T1[ T1$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT1 <- T1[ T1$Keep == 0,]
  for (i in 1:nrow(GappedT1)) {
    Test <- Subset[ Subset$Region < GappedT1[i,2] & Subset$Region > GappedT1[i,1],]
    if (nrow(Test) <= 8) { GappedT1[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT1[i,"Keep"] <- 0 }
    if (GappedT1[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT1[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT1[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT1[ GappedT1$Keep == 1,]) > 0) {
    GappedT1Keep <- GappedT1[ GappedT1$Keep == 1, c(4,5,3)]
    colnames(GappedT1Keep) <- colnames(Proceeding)
    T1 <- rbind(GappedT1Keep, Proceeding)
  }
  T1 <- T1[ order(T1[,1]),]
  
  # Identify type 2 boundaries (right > left)
  T2 <- data.frame()
  for (i in 1:length(PositiveRegions)) {
    if (length(NegativeRegions[ NegativeRegions >= PositiveRegions[i]]) > 0) {
      T2[i,1] <- PositiveRegions[i]
      T2[i,2] <- min(NegativeRegions[ NegativeRegions >= PositiveRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T2 <- T2[ order(T2[,2], -T2[,1]),]
  T2 <- T2[ duplicated(T2[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T2$Keep <- 0
  T2[ T2[,2] == (T2[,1] + 1),"Keep"] <- 1
  Proceeding <- T2[ T2$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT2 <- T2[ T2$Keep == 0,]
  for (i in 1:nrow(GappedT2)) {
    Test <- Subset[ Subset$Region < GappedT2[i,2] & Subset$Region > GappedT2[i,1],]
    if (nrow(Test) <= 8) { GappedT2[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT2[i,"Keep"] <- 0 }
    if (GappedT2[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT2[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT2[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT2[ GappedT2$Keep == 1,]) > 0) {
    GappedT2Keep <- GappedT2[ GappedT2$Keep == 1, c(4,5,3)]
    colnames(GappedT2Keep) <- colnames(Proceeding)
    T2 <- rbind(GappedT2Keep, Proceeding)
  }
  T2 <- T2[ order(T2[,1]),]
  
  # Get non-boundary biased regions
  PositiveNonbias <- data.frame(Region = PositiveRegions[ !(PositiveRegions %in% T1[,2]) & !(PositiveRegions %in% GappedT1[ GappedT1$Keep == 1,2])])
  NegativeNonbias <- data.frame(Region = NegativeRegions[ !(NegativeRegions %in% T2[,2]) & !(NegativeRegions %in% GappedT2[ GappedT2$Keep == 1,2])])
  
  # Stich together T1 regions
  TADs <- data.frame()
  for (i in 1:(nrow(T1)-1)) {
    TADs[i,1] <- T1[i,2]
    TADs[i,2] <- T1[(i+1),2]
  }
  
  # Remove TADs that doesnt cross a T2 boundary or multiple
  TADs$Keep <- 0
  TADs$T2 <- 0
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,2]
    Tmp <- data.frame(T2 = T2[ T2[,2] >= Min & T2[,2] <= Max,])
    if (nrow(Tmp) == 1) { 
      TADs[i,"Keep"] <- 1
      TADs[i,"T2"] <- Tmp[1,1]
    }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have negative bias between first T1 and T2 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,4]
    Tmp <- data.frame(NegativeNonbias = NegativeNonbias[ NegativeNonbias > Min & NegativeNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have positive bias between T2 and second T1 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,4]
    Max <- TADs[i,2]
    Tmp <- data.frame(PositiveNonbias = PositiveNonbias[ PositiveNonbias > Min & PositiveNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate TAD boundaries
  TADs$chr <- as.character(Chr)
  for (i in 1:nrow(TADs)) {
    Tmp <- Subset[ Subset$Region == TADs[i,1],]
    TADs[i,"start"] <- Tmp[ which.max(Tmp[,5]),3]
    Tmp <- Subset[ Subset$Region == (TADs[i,2]-1),]
    TADs[i,"end"] <- Tmp[ which.min(Tmp[,5]),3]+50000
  }
  
  # Filter TADs in low coverage areas (+/- 20 bins)
  Subset$Forward <- Subset[2,3] - Subset[1,3]
  for (nr in 2:nrow(Subset)) { Subset[nr,"Forward"] <- Subset[nr,3] - Subset[(nr-1),3] }
  
  for (m in 1:nrow(TADs)) {
    Start <- as.numeric(TADs[m,6])-50000
    End <- as.numeric(TADs[m,7])+50000
    Tmp <- Subset
    Tmp <- Tmp[ Tmp[,3] >= Start & Tmp[,3] < End,]
    if (max(Tmp$Forward) > 5000) { TADs[m,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate T1 boundaries
  T1$chr <- as.character(Chr)
  for (i in 1:nrow(T1)) {
    Tmp <- Subset[ Subset$Region == T1[i,2],]
    T1[i,"boundary"] <- min(Tmp[,3])
  }
  
  # Finalize TADs
  TADs <- TADs[,c(5,6,7)]
  
  # Combine all TADs
  All <- rbind(All, TADs)
  
  # Save T1 Boundaries
  T1 <- T1[,c(4,5)]
  Bounds <- rbind(Bounds, T1)
}

# Rename the TAD calls
ESC_TADs <- All
ESC_Bounds <- Bounds

## Call TADs on D2 data
# Import the data
Zero <- read.table("Data/Interactions/TADs/Cortex.DI", quote="\"", comment.char="", skip=1)

## Call TADs on sDI
# Convert sDI to z-scores
Mean <- mean(Zero$V4)
SD <- sd(Zero$V4)
Zero$V4 <- (Zero$V4 - Mean)/SD
rm(Mean)
rm(SD)

All <- data.frame()
Bounds <- data.frame()
Chromosomes <- c(paste("chr",seq(1,19,by=1),sep=""),"chrX")
for (Chr in Chromosomes) {
  # Subset to a single chromosome
  Subset <- Zero[ Zero$V1 == Chr,]
  
  # Identify transition points (between positive and negative bias)
  Subset$Transition <- 0
  for (i in 1:(nrow(Subset)-1)) {
    SignCurr <- sign(Subset[i,4])
    SignNext <- sign(Subset[(i+1),4])
    if (SignCurr != SignNext) { Subset[c(i),"Transition"] <- 1}
  }
  
  # Identify each region of bias
  Region <- 1
  Subset$Region <- 0
  for (i in 1:nrow(Subset)) {
    Subset[i,"Region"] <- Region
    if (Subset[i,"Transition"] == 1) { Region <- Region + 1}
  }
  
  # Calculate the length of each region
  Subset$Count <- 1
  Counts <- aggregate(Subset$Count, by=list(Subset$Region), FUN="sum")
  colnames(Counts) <- c("Region","Length")
  Subset <- merge(Subset, Counts, by="Region")
  
  # Identify regions longer or equal to 10 bins
  Subset$Long <- 0
  Subset[ Subset$Length >= 10,"Long"] <- 1
  
  # Mark each region for sufficient bias and set the sign
  Subset$Bias <- 0
  Subset$Sign <- 0
  for (i in unique(Subset$Region)) {
    Tmp <- Subset[ Subset$Region == i,]
    if (max(abs(Tmp$V4)) >= 0.5) {
      Subset[ Subset$Region == i, "Bias"] <- 1
      Subset[ Subset$Region == i, "Sign"] <- sign(max(Tmp$V4))
    }
  }
  
  # Remove regions that are too short (< 10 bins = 50kb)
  Subset[ Subset$Length <= 10, "Bias"] <- 0
  
  # Identify biased regions
  PositiveRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == 1, "Region"])
  NegativeRegions <- unique(Subset[ Subset$Bias == 1 & Subset$Sign == -1, "Region"])
  
  ## Identify type 1 boundaries (left > right)
  # For each negative region, get the closest positive region
  T1 <- data.frame()
  for (i in 1:length(NegativeRegions)) {
    if (length(PositiveRegions[ PositiveRegions >= NegativeRegions[i]]) > 0) {
      T1[i,1] <- NegativeRegions[i]
      T1[i,2] <- min(PositiveRegions[ PositiveRegions >= NegativeRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T1 <- T1[ order(T1[,2], -T1[,1]),]
  T1 <- T1[ duplicated(T1[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T1$Keep <- 0
  T1[ T1[,2] == (T1[,1] + 1),"Keep"] <- 1
  Proceeding <- T1[ T1$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT1 <- T1[ T1$Keep == 0,]
  for (i in 1:nrow(GappedT1)) {
    Test <- Subset[ Subset$Region < GappedT1[i,2] & Subset$Region > GappedT1[i,1],]
    if (nrow(Test) <= 8) { GappedT1[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT1[i,"Keep"] <- 0 }
    if (GappedT1[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT1[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT1[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT1[ GappedT1$Keep == 1,]) > 0) {
    GappedT1Keep <- GappedT1[ GappedT1$Keep == 1, c(4,5,3)]
    colnames(GappedT1Keep) <- colnames(Proceeding)
    T1 <- rbind(GappedT1Keep, Proceeding)
  }
  T1 <- T1[ order(T1[,1]),]
  
  # Identify type 2 boundaries (right > left)
  T2 <- data.frame()
  for (i in 1:length(PositiveRegions)) {
    if (length(NegativeRegions[ NegativeRegions >= PositiveRegions[i]]) > 0) {
      T2[i,1] <- PositiveRegions[i]
      T2[i,2] <- min(NegativeRegions[ NegativeRegions >= PositiveRegions[i]])  
    }
  }
  
  # Keep the negative that is closest to the positive border in case there are multiple
  T2 <- T2[ order(T2[,2], -T2[,1]),]
  T2 <- T2[ duplicated(T2[,2])==F,]
  
  # Keep the ones that are directly preceeding each other
  T2$Keep <- 0
  T2[ T2[,2] == (T2[,1] + 1),"Keep"] <- 1
  Proceeding <- T2[ T2$Keep == 1,]
  
  # Analyze the gap for gapped boundaries
  GappedT2 <- T2[ T2$Keep == 0,]
  for (i in 1:nrow(GappedT2)) {
    Test <- Subset[ Subset$Region < GappedT2[i,2] & Subset$Region > GappedT2[i,1],]
    if (nrow(Test) <= 8) { GappedT2[i,"Keep"] <- 1 }
    if (max(abs(Test[,5])) >= 0.25) { GappedT2[i,"Keep"] <- 0 }
    if (GappedT2[i,"Keep"] == 1) {
      Test <- Test[ order(-Test$V2),]
      GappedT2[i,4] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"] - 1
      GappedT2[i,5] <- Test[ which.min(abs(Test[,3] - mean(Test[,3]))),"Region"]
    }
  }
  if (nrow(GappedT2[ GappedT2$Keep == 1,]) > 0) {
    GappedT2Keep <- GappedT2[ GappedT2$Keep == 1, c(4,5,3)]
    colnames(GappedT2Keep) <- colnames(Proceeding)
    T2 <- rbind(GappedT2Keep, Proceeding)
  }
  T2 <- T2[ order(T2[,1]),]
  
  # Get non-boundary biased regions
  PositiveNonbias <- data.frame(Region = PositiveRegions[ !(PositiveRegions %in% T1[,2]) & !(PositiveRegions %in% GappedT1[ GappedT1$Keep == 1,2])])
  NegativeNonbias <- data.frame(Region = NegativeRegions[ !(NegativeRegions %in% T2[,2]) & !(NegativeRegions %in% GappedT2[ GappedT2$Keep == 1,2])])
  
  # Stich together T1 regions
  TADs <- data.frame()
  for (i in 1:(nrow(T1)-1)) {
    TADs[i,1] <- T1[i,2]
    TADs[i,2] <- T1[(i+1),2]
  }
  
  # Remove TADs that doesnt cross a T2 boundary or multiple
  TADs$Keep <- 0
  TADs$T2 <- 0
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,2]
    Tmp <- data.frame(T2 = T2[ T2[,2] >= Min & T2[,2] <= Max,])
    if (nrow(Tmp) == 1) { 
      TADs[i,"Keep"] <- 1
      TADs[i,"T2"] <- Tmp[1,1]
    }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have negative bias between first T1 and T2 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,1]
    Max <- TADs[i,4]
    Tmp <- data.frame(NegativeNonbias = NegativeNonbias[ NegativeNonbias > Min & NegativeNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Remove TADs that have positive bias between T2 and second T1 boundary
  for (i in 1:nrow(TADs)) {
    Min <- TADs[i,4]
    Max <- TADs[i,2]
    Tmp <- data.frame(PositiveNonbias = PositiveNonbias[ PositiveNonbias > Min & PositiveNonbias <= Max,])
    if (nrow(Tmp) > 0 ) { TADs[i,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate TAD boundaries
  TADs$chr <- as.character(Chr)
  for (i in 1:nrow(TADs)) {
    Tmp <- Subset[ Subset$Region == TADs[i,1],]
    TADs[i,"start"] <- Tmp[ which.max(Tmp[,5]),3]
    Tmp <- Subset[ Subset$Region == (TADs[i,2]-1),]
    TADs[i,"end"] <- Tmp[ which.min(Tmp[,5]),3]+50000
  }
  
  # Filter TADs in low coverage areas (+/- 20 bins)
  Subset$Forward <- Subset[2,3] - Subset[1,3]
  for (nr in 2:nrow(Subset)) { Subset[nr,"Forward"] <- Subset[nr,3] - Subset[(nr-1),3] }
  
  for (m in 1:nrow(TADs)) {
    Start <- as.numeric(TADs[m,6])-50000
    End <- as.numeric(TADs[m,7])+50000
    Tmp <- Subset
    Tmp <- Tmp[ Tmp[,3] >= Start & Tmp[,3] < End,]
    if (max(Tmp$Forward) > 5000) { TADs[m,"Keep"] <- 0 }
  }
  TADs <- TADs[ TADs$Keep == 1, ]
  
  # Annotate T1 boundaries
  T1$chr <- as.character(Chr)
  for (i in 1:nrow(T1)) {
    Tmp <- Subset[ Subset$Region == T1[i,2],]
    T1[i,"boundary"] <- min(Tmp[,3])
  }
  
  # Finalize TADs
  TADs <- TADs[,c(5,6,7)]
  
  # Combine all TADs
  All <- rbind(All, TADs)
  
  # Save T1 Boundaries
  T1 <- T1[,c(4,5)]
  Bounds <- rbind(Bounds, T1)
}

# Rename the TAD calls
Cortex_TADs <- All
Cortex_Bounds <- Bounds

## Clean up
rm(list=setdiff(ls(),c("Zero_TADs","Four_TADs","Two_TADs","ESC_TADs","Cortex_TADs","Zero_Bounds","Four_Bounds","Two_Bounds","ESC_Bounds","Cortex_Bounds")))

## Import data and convert to Z-scores
# D0
Zero <- read.table("C:/Users/jgsm/Desktop/D0.DI", quote="\"", comment.char="", skip=1)
Mean <- mean(Zero$V4)
SD <- sd(Zero$V4)
Zero$V4 <- (Zero$V4 - Mean)/SD
rm(Mean)
rm(SD)

# H4
Four <- read.table("C:/Users/jgsm/Desktop/H4.DI", quote="\"", comment.char="", skip=1)
Mean <- mean(Four$V4)
SD <- sd(Four$V4)
Four$V4 <- (Four$V4 - Mean)/SD
rm(Mean)
rm(SD)

# D2
Two <- read.table("C:/Users/jgsm/Desktop/D2.DI", quote="\"", comment.char="", skip=1)
Mean <- mean(Two$V4)
SD <- sd(Two$V4)
Two$V4 <- (Two$V4 - Mean)/SD
rm(Mean)
rm(SD)

# ESC
ESC <- read.table("C:/Users/jgsm/Desktop/ESC.DI", quote="\"", comment.char="", skip=1)
Mean <- mean(ESC$V4)
SD <- sd(ESC$V4)
ESC$V4 <- (ESC$V4 - Mean)/SD
rm(Mean)
rm(SD)

# Cortex
Cortex <- read.table("C:/Users/jgsm/Desktop/Cortex.DI", quote="\"", comment.char="", skip=1)
Mean <- mean(Cortex$V4)
SD <- sd(Cortex$V4)
Cortex$V4 <- (Cortex$V4 - Mean)/SD
rm(Mean)
rm(SD)

### Overlap boundaries and group by identification of T1 boundary
## Combine all boundaries
Bounds <- rbind(Zero_Bounds, Four_Bounds, Two_Bounds, ESC_Bounds, Cortex_Bounds)

## Filter boundaries within +/- 25 kb
# Setup
Bounds <- Bounds[ order(Bounds$chr, Bounds$boundary),]
Bounds$Region <- 0
Counter <- 1
Bounds$ID <- seq(1, nrow(Bounds), by = 1)

# Loop
for (i in 1:nrow(Bounds)) {
  Bounds[i,"Region"] <- Counter
  CurrChr <- Bounds[i,1]
  CurrBoundary <- Bounds[i,2]
  CurrID <- Bounds[i,4]
  Tmp <- Bounds[ Bounds$chr == CurrChr & abs(Bounds$boundary - CurrBoundary) <= 25000,]
  if (max(Tmp$ID) == CurrID) { Counter <- Counter + 1 }
}

# Keep center of each region
Bounds$Collaps <- paste(Bounds$chr, Bounds$boundary, Bounds$Region, sep="-")
Bounds <- Bounds[ duplicated(Bounds$Collaps)==F,]
Center <- aggregate(Bounds$boundary, by=list(Bounds$Region), FUN="mean")
colnames(Center) <- c("Region","boundary")
Bounds <- merge(Bounds, Center, by="Region")
Bounds <- Bounds[ duplicated(Bounds$Region)==F,]
Bounds <- Bounds[,c(2,6)]
colnames(Bounds) <- c("chr","boundary")

### Group by cell-type boundaries
## Combine 3T3-L1 T1s
T3_Bounds <- rbind(Zero_Bounds,Four_Bounds,Two_Bounds)

## Group IDs:
# Group 1 == 3T3-L1
# Group 2 == ESC 
# Group 3 == Cortex 
# Group 23 == ESC + Cortex
# Group 12 == 3T3-L1 + ESC
# Group 13 == 3T3-L1 + Cortex
# Group 123 == 3T3-L1 + ESC + Cortex
Bounds$Group <- 0

for (i in 1:nrow(Bounds)) {
  CurrChr <- Bounds[i,1]
  CurrBoundary <- Bounds[i,2]
  TestT3 <- T3_Bounds[ T3_Bounds$chr == CurrChr,]
  TestT3 <- TestT3[ abs(TestT3$boundary - CurrBoundary) <= 50000,]
  TestESC <- ESC_Bounds[ ESC_Bounds$chr == CurrChr,]
  TestESC <- TestESC[ abs(TestESC$boundary - CurrBoundary) <= 50000,]
  TestCortex <- Cortex_Bounds[ Cortex_Bounds$chr == CurrChr,]
  TestCortex <- TestCortex[ abs(TestCortex$boundary - CurrBoundary) <= 50000,]
  if (nrow(TestT3) > 0 & nrow(TestCortex) == 0 & nrow(TestESC) == 0) { Bounds[i,"Group"] <- 1 }
  if (nrow(TestT3) == 0 & nrow(TestCortex) == 0 & nrow(TestESC) > 0) { Bounds[i,"Group"] <- 2 }
  if (nrow(TestT3) == 0 & nrow(TestCortex) > 0 & nrow(TestESC) == 0) { Bounds[i,"Group"] <- 3 }
  if (nrow(TestT3) == 0 & nrow(TestCortex) > 0 & nrow(TestESC) > 0) { Bounds[i,"Group"] <- 23 }
  if (nrow(TestT3) > 0 & nrow(TestCortex) == 0 & nrow(TestESC) > 0) { Bounds[i,"Group"] <- 12 }
  if (nrow(TestT3) > 0 & nrow(TestCortex) > 0 & nrow(TestESC) == 0) { Bounds[i,"Group"] <- 13 }
  if (nrow(TestT3) > 0 & nrow(TestCortex) > 0 & nrow(TestESC) > 0) { Bounds[i,"Group"] <- 123 }
}

## Clean up
rm(list=setdiff(ls(),c("Zero_TADs","Four_TADs","Two_TADs","ESC_TADs","Cortex_TADs","Zero_Bounds","Four_Bounds","Two_Bounds","ESC_Bounds","Cortex_Bounds","Bounds","Zero","Four","Two","ESC","Cortex")))

# Get the numbers for the Venn diagram (Produced with Venn diagram plotter):
# A = 3T3-L1
nrow(Bounds[ Bounds$Group == 1 | Bounds$Group == 12 | Bounds$Group == 13 | Bounds$Group == 123,])

# B = ESC
nrow(Bounds[ Bounds$Group == 2 | Bounds$Group == 12 | Bounds$Group == 23 | Bounds$Group == 123,])

# C = Cortex
nrow(Bounds[ Bounds$Group == 3 | Bounds$Group == 13 | Bounds$Group == 23 | Bounds$Group == 123,])

# A/B
nrow(Bounds[ Bounds$Group == 12,])

# B/C
nrow(Bounds[ Bounds$Group == 23,])

# A/C
nrow(Bounds[ Bounds$Group == 13,])

# Overlaps all 3
nrow(Bounds[ Bounds$Group == 123,])

Back to start
Back to overview of Figure 1