-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path.Rhistory
512 lines (512 loc) · 31.3 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
present=length(which(is.na(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_MPP > foldchange),i])==FALSE));
total=length(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_MPP > foldchange),i]);
freq=c(freq,present/total);
}
DE <- rbind(DE, freq)
total_DE <- c(total_DE, total)
## down-regulated genes
freq<-NULL; for(i in 22:26) {
present=length(which(is.na(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_MPP < -foldchange),i])==FALSE));
total=length(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_MPP < -foldchange),i]);
freq=c(freq,present/total);
}
DE <- rbind(DE, freq)
total_DE <- c(total_DE, total)
## non-regulated genes
freq<-NULL; for(i in 22:26) {
present=length(which(is.na(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_MPP < foldchange & motif_expression_nopeaks$foldchange_MPP > -foldchange),i])==FALSE));
total=length(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_MPP < foldchange & motif_expression_nopeaks$foldchange_MPP > -foldchange),i]);
freq=c(freq,present/total);
}
nDE <- rbind(nDE, freq)
total_nDE <- c(total_nDE, total)
## compute p-value for motif frequency between up- and non- regulated genes
tmp<-NULL
for(i in 1:length(freq)) {
if(i==5) {
tmp[i] <- fisher.test(matrix(c(DE[3,i]*total_DE[3], total_DE[3], nDE[2,i]*total_nDE[2], total_nDE[2]), nrow=2), alternative="l")$p.value
} else {
tmp[i] <- fisher.test(matrix(c(DE[3,i]*total_DE[3], total_DE[3], nDE[2,i]*total_nDE[2], total_nDE[2]), nrow=2), alternative="g")$p.value
}
}
pVal <- rbind(pVal, tmp)
## compute p-value for motif frequency between down- and non- regulated genes
tmp<-NULL
for(i in 1:length(freq)) {
if(i==5) {
tmp[i] <- fisher.test(matrix(c(DE[4,i]*total_DE[4], total_DE[4], nDE[2,i]*total_nDE[2], total_nDE[2]), nrow=2), alternative="l")$p.value
} else {
tmp[i] <- fisher.test(matrix(c(DE[4,i]*total_DE[4], total_DE[4], nDE[2,i]*total_nDE[2], total_nDE[2]), nrow=2), alternative="g")$p.value
}
}
pVal <- rbind(pVal, tmp)
## HSC
## up-regulated genes
freq<-NULL; for(i in 22:26) {
present=length(which(is.na(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_HSC > foldchange),i])==FALSE));
total=length(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_HSC > foldchange),i]);
freq=c(freq,present/total);
}
DE <- rbind(DE, freq)
total_DE <- c(total_DE, total)
## down-regulated genes
freq<-NULL; for(i in 22:26) {
present=length(which(is.na(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_HSC < -foldchange),i])==FALSE));
total=length(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_HSC < -foldchange),i]);
freq=c(freq,present/total);
}
DE <- rbind(DE, freq)
total_DE <- c(total_DE, total)
## non-regulated genes
freq<-NULL; for(i in 22:26) {
present=length(which(is.na(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_HSC < foldchange & motif_expression_nopeaks$foldchange_HSC > -foldchange),i])==FALSE));
total=length(motif_expression_nopeaks[which(motif_expression_nopeaks$foldchange_HSC < foldchange & motif_expression_nopeaks$foldchange_HSC > -foldchange),i]);
freq=c(freq,present/total);
}
nDE <- rbind(nDE, freq)
total_nDE <- c(total_nDE, total)
## compute p-value for motif frequency between up- and non- regulated genes
tmp<-NULL
for(i in 1:length(freq)) {
if(i==5) {
tmp[i] <- fisher.test(matrix(c(DE[5,i]*total_DE[5], total_DE[5], nDE[3,i]*total_nDE[3], total_nDE[3]), nrow=2), alternative="l")$p.value
} else {
tmp[i] <- fisher.test(matrix(c(DE[5,i]*total_DE[5], total_DE[5], nDE[3,i]*total_nDE[3], total_nDE[3]), nrow=2), alternative="g")$p.value
}
}
pVal <- rbind(pVal, tmp)
## compute p-value for motif frequency between down- and non- regulated genes
tmp<-NULL
for(i in 1:length(freq)) {
if(i==5) {
tmp[i] <- fisher.test(matrix(c(DE[6,i]*total_DE[6], total_DE[6], nDE[3,i]*total_nDE[3], total_nDE[3]), nrow=2), alternative="l")$p.value
} else {
tmp[i] <- fisher.test(matrix(c(DE[6,i]*total_DE[6], total_DE[6], nDE[3,i]*total_nDE[3], total_nDE[3]), nrow=2), alternative="g")$p.value
}
}
pVal <- rbind(pVal, tmp)
par(mfrow=c(3,2), mar=c(4,4,4,2))
#motifs <- c("Sp1.zf", "NFY", "Elk1.ETS", "PB0171.1_Sox18_2", "MA0108.2_TBP")
motifs <- c("CEBP", "PB0058.1_Sfpi1", "RUNX_Runt", "MA0477.1_FOSL1", "MA0599.1_KLF5")
col <- c("#00A600FF", "#00A600FF", "#E6E600FF", "#E6E600FF", "#EAB64EFF", "#EAB64EFF", "#EEB99FFF", "#EEB99FFF", "#F2F2F2FF", "#F2F2F2FF")
barX <- barplot(rbind(DE[1,],nDE[1,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between up- (%d) and non-regulated (%d) genes (preGM)", total_DE[1], total_nDE[1]), ylab="density")
text(x=barX[1,], y=DE[1,]+0.05, label=round(pVal[1,], digits=2))
par(mfrow=c(3,2), mar=c(8,4,4,2))
#motifs <- c("Sp1.zf", "NFY", "Elk1.ETS", "PB0171.1_Sox18_2", "MA0108.2_TBP")
motifs <- c("CEBP", "PB0058.1_Sfpi1", "RUNX_Runt", "MA0477.1_FOSL1", "MA0599.1_KLF5")
col <- c("#00A600FF", "#00A600FF", "#E6E600FF", "#E6E600FF", "#EAB64EFF", "#EAB64EFF", "#EEB99FFF", "#EEB99FFF", "#F2F2F2FF", "#F2F2F2FF")
barX <- barplot(rbind(DE[1,],nDE[1,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between up- (%d) and non-regulated (%d) genes (preGM)", total_DE[1], total_nDE[1]), ylab="density")
text(x=barX[1,], y=DE[1,]+0.05, label=round(pVal[1,], digits=2))
barX <- barplot(rbind(DE[2,],nDE[1,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between down- (%d) and non-regulated (%d) genes (preGM)", total_DE[2], total_nDE[1]), ylab="density")
text(x=barX[1,], y=DE[2,]+0.05, label=round(pVal[2,], digits=2))
barX <- barplot(rbind(DE[3,], nDE[2,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between up- (%d) and non-regulated (%d) genes (MPP)", total_DE[3], total_nDE[2]), ylab="density")
text(x=barX[1,], y=DE[3,]+0.05, label=round(pVal[3,], digits=2))
barX <- barplot(rbind(DE[4,], nDE[2,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between down- (%d) and non-regulated (%d) genes (MPP)", total_DE[4], total_nDE[2]), ylab="density")
text(x=barX[1,], y=DE[4,]+0.05, label=round(pVal[4,], digits=2))
barX <- barplot(rbind(DE[5,], nDE[3,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between up- (%d) and non-regulated (%d) genes (HSC)", total_DE[5], total_nDE[3]), ylab="density")
text(x=barX[1,], y=DE[5,]+0.05, label=round(pVal[5,], digits=2))
barX <- barplot(rbind(DE[6,], nDE[3,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between down- (%d) and non-regulated (%d) genes (HSC)", total_DE[6], total_nDE[3]), ylab="density")
text(x=barX[1,], y=DE[6,]+0.05, label=round(pVal[6,], digits=2))
dev.off()
pdf("motif_analysis_cebpa_expression_barplot_nopeaks.pdf", width=12)
par(mfrow=c(3,2), mar=c(8,4,4,2))
#motifs <- c("Sp1.zf", "NFY", "Elk1.ETS", "PB0171.1_Sox18_2", "MA0108.2_TBP")
motifs <- c("CEBP", "PB0058.1_Sfpi1", "RUNX_Runt", "MA0477.1_FOSL1", "MA0599.1_KLF5")
col <- c("#00A600FF", "#00A600FF", "#E6E600FF", "#E6E600FF", "#EAB64EFF", "#EAB64EFF", "#EEB99FFF", "#EEB99FFF", "#F2F2F2FF", "#F2F2F2FF")
barX <- barplot(rbind(DE[1,],nDE[1,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between up- (%d) and non-regulated (%d) genes (preGM)", total_DE[1], total_nDE[1]), ylab="density")
text(x=barX[1,], y=DE[1,]+0.05, label=round(pVal[1,], digits=2))
barX <- barplot(rbind(DE[2,],nDE[1,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between down- (%d) and non-regulated (%d) genes (preGM)", total_DE[2], total_nDE[1]), ylab="density")
text(x=barX[1,], y=DE[2,]+0.05, label=round(pVal[2,], digits=2))
barX <- barplot(rbind(DE[3,], nDE[2,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between up- (%d) and non-regulated (%d) genes (MPP)", total_DE[3], total_nDE[2]), ylab="density")
text(x=barX[1,], y=DE[3,]+0.05, label=round(pVal[3,], digits=2))
barX <- barplot(rbind(DE[4,], nDE[2,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between down- (%d) and non-regulated (%d) genes (MPP)", total_DE[4], total_nDE[2]), ylab="density")
text(x=barX[1,], y=DE[4,]+0.05, label=round(pVal[4,], digits=2))
barX <- barplot(rbind(DE[5,], nDE[3,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between up- (%d) and non-regulated (%d) genes (HSC)", total_DE[5], total_nDE[3]), ylab="density")
text(x=barX[1,], y=DE[5,]+0.05, label=round(pVal[5,], digits=2))
barX <- barplot(rbind(DE[6,], nDE[3,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between down- (%d) and non-regulated (%d) genes (HSC)", total_DE[6], total_nDE[3]), ylab="density")
text(x=barX[1,], y=DE[6,]+0.05, label=round(pVal[6,], digits=2))
dev.off()
View(motif_expression_nopeaks)
colnames(motif_expression_nopeaks)
ls
barX <- barplot(rbind(DE[6,], nDE[3,]), beside=1, ylim=c(0,0.8), names.arg=motifs, las=2, col=col, main=sprintf("motif density between down- (%d) and non-regulated (%d) genes (HSC)", total_DE[6], total_nDE[3]), ylab="density")
q()
setwd("~/Oyster/chip-seq-analysis/analysis/motif_analysis")
motif <- read.delim("cebpa.peaks.motif.all", sep="\t")
peaks <- read.table("../cebpa_peaks.txt", header=T)
motif_peaks <- merge(motif, peaks, by.x="PeakID..cmd.annotatePeaks.pl....cebpa_summits_window.bed.mm9..m.cebpa_motif_homer.homerResults.motif1.motif.cebpa_motif_homer.homerResults.motif2.motif.cebpa_motif_homer.homerResults.motif3.motif.cebpa_motif_homer.homerResults.motif4.motif.cebpa_motif_homer.homerResults.motif5.motif.", by.y="cebpa_id")
expression <- read.table("../gmp_pregm_expression.txt", header=T)
motif_peaks_expression <- merge(motif_peaks, expression, by.x="closest_gene", by.y="symbols")
motif_peaks_expression <- motif_peaks_expression[with(motif_peaks_expression, order(motif_peaks_expression[,1], motif_peaks_expression[,34])),]
motif_peaks_expression$foldchange_GMP <- log((exp(motif_peaks_expression$GMP*log(2)))/(exp(motif_peaks_expression$preGM*log(2))))
motif_peaks_expression[motif_peaks_expression==""] <- NA
motif_peaks_expression_lt500 <- motif_peaks_expression[(which(motif_peaks_expression$dist_closest_gene<3000)),]
dim(motif_peaks_expression)
dim(motif_peaks_expression_lt500)
motif_peaks_expression[motif_peaks_expression==""] <- NA
motif_peaks_expression_lt500 <- motif_peaks_expression[(which(motif_peaks_expression$dist_closest_gene<3000)),]
View(motif_peaks_expression_lt500)
write.table(motif_peaks_expression_lt500[which(motif_peaks_expression_lt500$foldchange_GMP > 0.69),c("V3", "V4", "V5", "V2", "V34", "V6")], "../cebpa_peaks_upregulated.bed", sep="\t", quote=F, row.names=F, col.names=F)
motif_peaks_expression_lt500[which(motif_peaks_expression_lt500$foldchange_GMP > 0.69),c("V3", "V4", "V5", "V2", "V34", "V6")]
motif_peaks_expression_lt500[which(motif_peaks_expression_lt500$foldchange_GMP > 0.69),]
motif_peaks_expression_lt500[which(motif_peaks_expression_lt500$foldchange_GMP > 0.69),c("V3", "V4", "V5", "V2", "V34", "V6")]
write.table(motif_peaks_expression_lt500[which(motif_peaks_expression_lt500$foldchange_GMP > 0.69),c("Chr", "Start", "End", "closest_gene", "dist_closest_PU1_peak", "Strand")], "../cebpa_peaks_upregulated.bed", sep="\t", quote=F, row.names=F, col.names=F)
write.table(motif_peaks_expression_lt500[which(motif_peaks_expression_lt500$foldchange_GMP < -0.69),c("Chr", "Start", "End", "closest_gene", "dist_closest_PU1_peak", "Strand")], "../cebpa_peaks_downregulated.bed", sep="\t", quote=F, row.names=F, col.names=F)
write.table(motif_peaks_expression_lt500[which(motif_peaks_expression_lt500$foldchange_GMP > -0.10 & motif_peaks_expression_lt500$foldchange_GMP < 0.10),c("Chr", "Start", "End", "closest_gene", "dist_closest_PU1_peak", "Strand")], "../cebpa_peaks_notregulated.bed", sep="\t", quote=F, row.names=F, col.names=F)
setwd("~/Oyster/chip-seq-analysis/analysis/motif_analysis")
source("http://bioconductor.org/biocLite.R")
biocLite("GenomicRanges")
source("http://bioconductor.org/biocLite.R")
biocLite("GenomicRanges")
biocLite("BSgenome.Mmusculus.UCSC.mm9")
defaults write org.R-project.R force.LANG en_US.UTF-8
biocLite("BSgenome.Mmusculus.UCSC.mm9")
source("http://bioconductor.org/biocLite.R")
biocLite("GenomicRanges")
biocLite("BSgenome.Mmusculus.UCSC.mm9")
library('GenomicRanges')
require("BSgenome.Mmusculus.UCSC.mm9", character.only=T)
library('GenomicRanges')
coor <- read.delim(file="../cebpa_peaks_upregulated.bed")
mySeq <- GRanges(
seqnames=coor$V1,
ranges=IRanges(start=coor$V2, end=coor$V3),
strand=coor$V6)
head(coor)
coor <- read.delim(file="../cebpa_peaks_upregulated.bed", header=F)
mySeq <- GRanges(
seqnames=coor$V1,
ranges=IRanges(start=coor$V2, end=coor$V3),
strand=coor$V6)
require("BSgenome.Mmusculus.UCSC.mm9",character.only = TRUE)
seq <- getSeq(Mmusculus, mySeq)
length(seq)
as.character(seq)
seq
as.character(seq)
write.table(as.character(seq), "../cebpa_peaks_upregulated.seq", sep="\t", quote=F, row.names=F, col.names=F)
coor <- read.delim(file="../cebpa_peaks_downregulated.bed", header=F)
mySeq <- GRanges(
seqnames=coor$V1,
ranges=IRanges(start=coor$V2, end=coor$V3),
strand=coor$V6)
require("BSgenome.Mmusculus.UCSC.mm9",character.only = TRUE)
seq <- getSeq(Mmusculus, mySeq)
length(seq)
write.table(as.character(seq), "../cebpa_peaks_downregulated.seq", sep="\t", quote=F, row.names=F, col.names=F)
coor <- read.delim(file="../cebpa_peaks_notregulated.bed", header=F)
mySeq <- GRanges(
seqnames=coor$V1,
ranges=IRanges(start=coor$V2, end=coor$V3),
strand=coor$V6)
require("BSgenome.Mmusculus.UCSC.mm9",character.only = TRUE)
seq <- getSeq(Mmusculus, mySeq)
write.table(as.character(seq), "../cebpa_peaks_notregulated.seq", sep="\t", quote=F, row.names=F, col.names=F)
load("~/Lobster/project/rna-seq-analysis/result_CFUE/output/5_spliceR_analysis/03_mySpliceRList_analyzed_high_confidence.Rdata")
library("spliceR")
install.packages("spliceR")
source("http://bioconductor.org/biocLite.R")
biocLite("spliceR")
load("~/Lobster/project/rna-seq-analysis/result_CFUE/output/5_spliceR_analysis/03_mySpliceRList_analyzed_high_confidence.Rdata")
session.info()
sessionInfo()
mySpliceRList_hc
head(mySpliceRList_hc)
col.names(mySpliceRList_hc)
colnames(mySpliceRList_hc)
head(mySpliceRList_hc, 4)
head(mySpliceRList_hc$transcript_features)
colnames(mySpliceRList_hc$transcript_features)
head(mySpliceRList_hc$transcript_features)
head(mySpliceRList_hc$transcript_features[which(mySpliceRList_hc$transcript_features$spliceR.iso_significant=="yes"),])
head(mySpliceRList_hc$transcript_features[which(mySpliceRList_hc$transcript_features$spliceR.iso_significant=="no"),])
head(mySpliceRList_hc$transcript_features[which(mySpliceRList_hc$transcript_features$spliceR.iso_significant=="yes"),])
head(mySpliceRList_hc$transcript_features)
head(mySpliceRList_hc$exon_features)
head(mySpliceRList_hc$exon_features, 10)
conditions(mySpliceRList_hc)
spliceRPlot(mySpliceRList_hc, evaluate="nr_transcript_pr_gene")
load("~/Lobster/project/rna-seq-analysis/matilda/result_CFUE/output/5_spliceR_analysis/03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc$filter_params
mySpliceRList_hc$transcript_features
head(mySpliceRList_hc$transcript_features, 10)
hist(mySpliceRList_hc$transcript_features$spliceR.gene_p_value)
which(mySpliceRList_hc$transcript_features$spliceR.gene_p_value<0.01)
length(which(mySpliceRList_hc$transcript_features$spliceR.gene_p_value<0.01))
length(which(mySpliceRList_hc$transcript_features$spliceR.gene_p_value>=0.01))
head(mySpliceRList_hc$transcript_features, 10)
length(which(mySpliceRList_hc$transcript_features$spliceR.gene_q_value>=0.01))
length(which(mySpliceRList_hc$transcript_features$spliceR.gene_q_value<0.01))
unique(mySpliceRList_hc$transcript_features$spliceR.gene_significant)
which(mySpliceRList_hc$transcript_features$spliceR.gene_significant=="yes")
length(which(mySpliceRList_hc$transcript_features$spliceR.gene_significant=="yes"))
which(mySpliceRList_hc$transcript_features$spliceR.iso_significant=="yes")
which(mySpliceRList_hc$transcript_features$spliceR.iso_significant=="no")
which(mySpliceRList_hc$transcript_features$spliceR.iso_significant=="yes")
unique(mySpliceRList_hc$transcript_features$spliceR.PTC)
which(mySpliceRList_hc$transcript_features$spliceR.PTC=="TRUE")
length(which(mySpliceRList_hc$transcript_features$spliceR.PTC=="TRUE"))
head(mySpliceRList_hc$transcript_features, 10)
q()
t <- read.table("~/Downloads/hm1.txt")
View(t)
t <- read.table("~/Downloads/hm1.txt", header=T)
View(t)
colSums(t[,5])
head(t[,5])
sum(t[,5])
colSums(df[t[,5]])
sum(t[,5], na.rm=TRUE)
sum(t[,c(5,6)], na.rm=TRUE)
sum(df[t[,c(5,6)]], na.rm=TRUE)
sum(t[c(5,6)], na.rm=TRUE)
colSums(t[,c(3,4)])
colSums(t[,c(5,6)])
colSums(t[,c(5:96)])
colnames(t)
colSums(t[,c(5:105)])
colSums(t[,c(5:106)])
colSums(t[,c(5:105)])
dim(t)
colSums(t[,c(5:105)])/4322
barplot(colSums(t[,c(5:105)])/4322)
barplot(t[1:c(5:105)])
barplot(t[1,c(5:105)])
t[1,c(5:105)]
barplot(t[1,c(5:105)])
barplot(as.vector(t[1,c(5:105)]))
t[1,c(5:105)]
t1 <- t[1,c(5:105)]
t1
as.vector(t[1,c(5:105)])
plot(as.vector(t[1,c(5:105)]))
as.vector(t[1,c(5:105)])
unlist(t[1,c(5:105)])
as.vector(unlist(t[1,c(5:105)]))
barplot(as.vector(unlist(t[1,c(5:105)])))
barplot(as.vector(unlist(t[2,c(5:105)])))
barplot(as.vector(unlist(t[,c(5:105)])))
barplot(as.vector(unlist(t[6,c(5:105)])))
barplot(as.vector(unlist(t[10,c(5:105)])))
barplot(as.vector(unlist(t[20,c(5:105)])))
barplot(as.vector(unlist(t[6,c(16:105)])))
barplot(as.vector(unlist(t[6,c(30:105)])))
barplot(as.vector(unlist(t[6,c(90:105)])))
barplot(as.vector(unlist(t[16,c(5:105)])))
barplot(as.vector(unlist(t[20,c(5:105)])))
barplot(as.vector(unlist(t[200,c(5:105)])))
rm(t1)
barplot(colSums(t[,c(5:105)])/4322)
barplot(colSums(t[,c(5:40)])/4322)
barplot(colSums(t[,c(41:60)])/4322)
barplot(colSums(t[,c(61:105)])/4322)
barplot(colSums(t[,c(61:105)]))
barplot(colSums(t[,c(61:105)])/4322)
rowSums(t[1,5:105])
rowSums(t[,5:105])
barplo(rowSums(t[,5:105]))
barplot(rowSums(t[,5:105]))
rowSums(t[1,5:45])
c(rowSums(t[1,5:45]), rowSums(t[1,46:65]), rowSums(t[1,65:105]))
c(rowSums(t[1,5:54]), rowSums(t[1,55:60]), rowSums(t[1,61:105]))
c(rowSums(t[1,5:54]), rowSums(t[1,55:56]), rowSums(t[1,57:105]))
barplot(c(rowSums(t[1,5:54]), rowSums(t[1,55:56]), rowSums(t[1,57:105])))
row=1
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=2
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=3
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=4
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=5
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=6
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=7
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=8
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=9
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=10
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=11
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=12
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=13
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=16
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=145
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=160
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=190
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=268
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=454
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=545
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=5676
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=567
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
row=978
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
barplot(c(rowSums(t[row,5:54])/50, rowSums(t[row,55:56])/2, rowSums(t[row,57:105])/50))
row=567
barplot(c(rowSums(t[row,5:54])/50, rowSums(t[row,55:56])/2, rowSums(t[row,57:105])/50))
row=5
barplot(c(rowSums(t[row,5:54])/50, rowSums(t[row,55:56])/2, rowSums(t[row,57:105])/50))
row=1
barplot(c(rowSums(t[row,5:54])/50, rowSums(t[row,55:56])/2, rowSums(t[row,57:105])/50))
barplot(c(rowSums(t[row,5:54]), rowSums(t[row,55:56]), rowSums(t[row,57:105])))
setwd("/Users/sachinpundhir/Lobster/project/rna-seq-analysis/matilda/result_CFUE_2Rep/output/5_spliceR_analysis")
load("03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc_cfue <- as.data.frame(mySpliceRList_hc$transcript_features)
down_gene_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.gene_significant=="yes" & mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < 0),]
View(down_gene_cfue)
unique(down_gene_cfue$spliceR.gene_short_name)
length(unique(down_gene_cfue$spliceR.gene_short_name))
length(unique(mySpliceRList_hc_cfue$spliceR.gene_short_name))
length(unique(mySpliceRList_hc_cfue$spliceR.gene_q_value<0.01))
length(which(mySpliceRList_hc_cfue$spliceR.gene_q_value<0.01))
length(which(mySpliceRList_hc_cfue$spliceR.gene_q_value<0.01))
length(which(mySpliceRList_hc_cfue$spliceR.gene_significant=="yes"))
length(which(mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change<-2))
length(which(mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < -2))
load("03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc_cfue <- as.data.frame(mySpliceRList_hc$transcript_features)
down_gene_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.gene_significant=="yes" & mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < 0),]
length(which(mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < -2))
length(which(mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < -1))
length(which(mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < -1.5))
length(which(mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < -1.7))
length((down_gene_cfue[which(down_gene_cfue$spliceR.PTC=="TRUE"),]$spliceR.gene_id))
down_gene_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.gene_significant=="yes" & mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < 0),]
down_iso_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.iso_significant=="yes" & mySpliceRList_hc_cfue$spliceR.iso_log2_fold_change < 0),]
rm(down_iso_cfue)
ptc_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.PTC=="TRUE"),]
setwd("/Users/sachinpundhir/Lobster/project/rna-seq-analysis/matilda/result_PreMegE_2Rep/output/5_spliceR_analysis")
load("03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc_premege <- as.data.frame(mySpliceRList_hc$transcript_features)
# down regulated genes
down_gene_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.gene_significant=="yes" & mySpliceRList_hc_premege$spliceR.gene_log2_fold_change < 0),]
# down regulated isoforms
down_iso_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.iso_significant=="yes" & mySpliceRList_hc_premege$spliceR.iso_log2_fold_change < 0),]
# PTC in transcripts
ptc_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.PTC=="TRUE"),]
rm(down_iso_premege)
head(down_gene_cfue)
head(down_gene_cfue$spliceR.gene_id)
head(down_gene_cfue$spliceR.gene_short_name)
intersect(down_gene_cfue$spliceR.gene_short_name, down_gene_premege$spliceR.gene_short_name)
intersect(down_gene_cfue$spliceR.gene_id, down_gene_premege$spliceR.gene_id)
intersect(down_gene_cfue$spliceR.gene_short_name, down_gene_premege$spliceR.gene_short_name)
write.table(down_gene_cfue, file="down_gene_cfue.xls", sep="\t", quote=F, row.names=F)
getwd()
setwd("/Users/sachinpundhir/Lobster/project/rna-seq-analysis/matilda/result_CFUE_2Rep/output/5_spliceR_analysis")
load("03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc_cfue <- as.data.frame(mySpliceRList_hc$transcript_features)
# down regulated genes
down_gene_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.gene_significant=="yes" & mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < 0),]
length((down_gene_cfue[which(down_gene_cfue$spliceR.PTC=="TRUE"),]$spliceR.gene_id))
# down regulated isoforms
write.table(down_gene_cfue, file="down_gene_cfue.xls", sep="\t", quote=F, row.names=F)
write.table(ptc_cfue, file="ptc_cfue.xls", sep="\t", quote=F, row.names=F)
setwd("/Users/sachinpundhir/Lobster/project/rna-seq-analysis/matilda/result_PreMegE_2Rep/output/5_spliceR_analysis")
load("03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc_premege <- as.data.frame(mySpliceRList_hc$transcript_features)
down_gene_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.gene_significant=="yes" & mySpliceRList_hc_premege$spliceR.gene_log2_fold_change < 0),]
down_iso_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.iso_significant=="yes" & mySpliceRList_hc_premege$spliceR.iso_log2_fold_change < 0),]
write.table(down_gene_premege, file="down_gene_premege.xls", sep="\t", quote=F, row.names=F)
ptc_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.PTC=="TRUE"),]
write.table(ptc_premege, file="ptc_premege.xls", sep="\t", quote=F, row.names=F)
down_gene_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.gene_significant=="yes" & mySpliceRList_hc_premege$spliceR.gene_log2_fold_change < 0),]
setwd("/Users/sachinpundhir/Lobster/project/rna-seq-analysis/matilda/result_CFUE_2Rep/output/5_spliceR_analysis")
write.table(mySpliceRList_hc_cfue, file="mySpliceRList_hc_cfue", sep="\t", quote=F, row.names=F)
setwd("/Users/sachinpundhir/Lobster/project/rna-seq-analysis/matilda/result_PreMegE_2Rep/output/5_spliceR_analysis")
load("03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc_premege <- as.data.frame(mySpliceRList_hc$transcript_features)
write.table(mySpliceRList_hc_premege, file="mySpliceRList_hc_premege", sep="\t", quote=F, row.names=F)
setwd("/Users/sachinpundhir/Lobster/project/rna-seq-analysis/matilda/result_CFUE_2Rep/output/5_spliceR_analysis")
load("03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc_cfue <- as.data.frame(mySpliceRList_hc$transcript_features)
write.table(mySpliceRList_hc_cfue, file="mySpliceRList_hc_cfue", sep="\t", quote=F, row.names=F)
load("03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc_cfue <- as.data.frame(mySpliceRList_hc$transcript_features)
write.table(mySpliceRList_hc_cfue, file="mySpliceRList_hc_cfue", sep="\t", quote=F, row.names=F)
# down regulated genes
down_gene_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.gene_significant=="yes" & mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < 0),]
write.table(down_gene_cfue, file="down_gene_cfue.xls", sep="\t", quote=F, row.names=F)
ptc_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.PTC=="TRUE"),]
write.table(ptc_cfue, file="ptc_cfue.xls", sep="\t", quote=F, row.names=F)
View(down_gene_cfue)
View(ptc_cfue)
colnames(mySpliceRList_hc_cfue)
View(mySpliceRList_hc_cfue)
head(mySpliceRList_hc_cfue[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)])
round(mySpliceRList_hc_cfue[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)], digits=2)
dim(round(mySpliceRList_hc_cfue[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)], digits=2))
head(round(mySpliceRList_hc_cfue[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)], digits=2))
head(round(mySpliceRList_hc_cfue[,c(-17)], digits=2))
ID = c("a","b","c","d","e")
Value1 = as.numeric(c("3.4","6.4","8.7","1.1","0.1"))
Value2 = as.numeric(c("8.2","1.7","6.4","1.9","10.3"))
df<-data.frame(ID,Value1,Value2, stringsAsFactors = FALSE)
df[,-1] <-round(df[,-1],0)
df
mySpliceRList_hc_cfue[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)] <- round(mySpliceRList_hc_cfue[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)], digits=2)
head(mySpliceRList_hc_cfue)
mySpliceRList_hc_cfue[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)] <- round(mySpliceRList_hc_cfue[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)], digits=2)
View(mySpliceRList_hc_cfue)
write.table(mySpliceRList_hc_cfue, file="mySpliceRList_hc_cfue", sep="\t", quote=F, row.names=F)
# down regulated genes
down_gene_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.gene_significant=="yes" & mySpliceRList_hc_cfue$spliceR.gene_log2_fold_change < 0),]
write.table(down_gene_cfue, file="down_gene_cfue.xls", sep="\t", quote=F, row.names=F)
# down regulated isoforms
down_iso_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.iso_significant=="yes" & mySpliceRList_hc_cfue$spliceR.iso_log2_fold_change < 0),]
# PTC in transcripts
ptc_cfue <- mySpliceRList_hc_cfue[which(mySpliceRList_hc_cfue$spliceR.PTC=="TRUE"),]
write.table(ptc_cfue, file="ptc_cfue.xls", sep="\t", quote=F, row.names=F)
mySpliceRList_hc_premege[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)] <- round(mySpliceRList_hc_premege[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)], digits=2)
setwd("/Users/sachinpundhir/Lobster/project/rna-seq-analysis/matilda/result_PreMegE_2Rep/output/5_spliceR_analysis")
load("03_mySpliceRList_analyzed_high_confidence.Rdata")
mySpliceRList_hc_premege <- as.data.frame(mySpliceRList_hc$transcript_features)
mySpliceRList_hc_premege[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)] <- round(mySpliceRList_hc_premege[,c(17,18,19,20,21,24,25,26,27,28,40,41,42)], digits=2)
write.table(mySpliceRList_hc_premege, file="mySpliceRList_hc_premege", sep="\t", quote=F, row.names=F)
# down regulated genes
down_gene_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.gene_significant=="yes" & mySpliceRList_hc_premege$spliceR.gene_log2_fold_change < 0),]
write.table(down_gene_premege, file="down_gene_premege.xls", sep="\t", quote=F, row.names=F)
# down regulated isoforms
down_iso_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.iso_significant=="yes" & mySpliceRList_hc_premege$spliceR.iso_log2_fold_change < 0),]
# PTC in transcripts
ptc_premege <- mySpliceRList_hc_premege[which(mySpliceRList_hc_premege$spliceR.PTC=="TRUE"),]
write.table(ptc_premege, file="ptc_premege.xls", sep="\t", quote=F, row.names=F)
setwd("~/Lobster/software/idrCode")
peakfile1 <- "~/Lobster/project/chip-seq-analysis/analysis/peak_calling_histone/h3k4me1/macs/h3k4me1_pregm_cebpa_wt_Rep1.pr1_Vs_control_Rep0.regionPeak"
peakfile2 <- "~/Lobster/project/chip-seq-analysis/analysis/peak_calling_histone/h3k4me1/macs/h3k4me1_pregm_cebpa_wt_Rep1.pr2_Vs_control_Rep0.regionPeak"
half.width=NULL
output.prefix <- "~/Lobster/project/chip-seq-analysis/analysis/peak_calling_histone/h3k4me1/consistency/selfPseudoReps/h3k4me1_pregm_cebpa_wt_Rep1.pr1_VS_h3k4me1_pregm_cebpa_wt_Rep1.pr2"
overlap.ratio <- 0
is.broadpeak <- F
sig.value <- p.value
sig.value <- "p.value"
source("functions-all-clayton-12-13.r")
chr.file <- "genome_table.txt"
chr.size <- read.table(chr.file)
sink(paste(output.prefix, "-Rout.txt", sep=""))
cat("is.broadpeak", is.broadpeak, "\n")
rep1 <- process.narrowpeak(paste(peakfile1, sep=""), chr.size, half.width=half.width, summit="offset", broadpeak=is.broadpeak)
rep2 <- process.narrowpeak(paste(peakfile2, sep=""), chr.size, half.width=half.width, summit="offset", broadpeak=is.broadpeak)
cat("is.broadpeak", is.broadpeak, "\n")
uri.output <- compute.pair.uri(rep1$data.cleaned, rep2$data.cleaned, sig.value1=sig.value, sig.value2=sig.value, overlap.ratio=overlap.ratio)
ls
ls
ls()
unlink()
q()