-
Notifications
You must be signed in to change notification settings - Fork 0
/
wes-batch1-data-assemble.R
60 lines (49 loc) · 3.32 KB
/
wes-batch1-data-assemble.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
######################################
## WES TEMPO results 11/26/19 Figure-4
######################################
wessamplelevel = fread('/Users/chavans/juno/work/ccs/ccs_wes/Proj_07871_DFLOQ/Result/somatic/sample_data.txt')
wessamplelevel = wessamplelevel %>% mutate(Tumor_Sample_Barcode = str_split(sample,"__",simplify = TRUE)[,1],
SAMPLE_TYPE = ifelse(grepl("M0",sample)=="TRUE","Metastasis","Primary"),
PTN = substr(Tumor_Sample_Barcode,1,10))
dim(wessamplelevel); head(wessamplelevel) #sample
NAsamples = filter(wessamplelevel, is.na(purity)) %>% select(Tumor_Sample_Barcode)
NAsamples$Tumor_Sample_Barcode
[1] "s_C_271D5P_M001_d" "s_C_271D5P_P001_d" "s_C_ADEV70_P001_d"
[4] "s_C_PDMVDR_P001_d" "s_C_W384MJ_M001_d"
wessamplelevel = wessamplelevel %>% mutate(purity = ifelse(is.na(purity), -1, purity))
wessamplelevel = wessamplelevel %>% mutate(LOW_PURITY = ifelse(purity > 0.2 ,FALSE, TRUE))
filter(wessamplelevel, LOW_PURITY == TRUE) %>% select(PTN) %>% distinct(.)
length(Private(wessamplelevel$PTN))
table(wessamplelevel$LOW_PURITY); table(wessamplelevel$purity)
#14 are low purity
wesqc_align = fread('/Users/chavans/juno/work/ccs/ccs_wes/Proj_07871_DFLOQ/Result/qc/alignment_qc.txt')
dim(wesqc_align); head(wesqc_align) #Sample T
#filter(wesqc_align, MeanTargetCoverage <=50)
filter(wesqc_align, Sample %like% '_N0', MedianTargetCoverage <=45) #Exclude this pair s_C_001390_N001_d s_C_001390_M001_d, N coverage is 18.
filter(wesqc_align, !(Sample %like% '_N0'), MedianTargetCoverage <=70) #Exclude s_C_001390_M001_d is 52.
> filter(wesqc_align, Sample %like% '_N0') %>% select(MedianTargetCoverage) %>% summarise(cov = mean(MedianTargetCoverage))
cov
1 86.59259
> filter(wesqc_align, !Sample %like% '_N0') %>% select(MedianTargetCoverage) %>% summarise(cov = mean(MedianTargetCoverage))
cov
1 151.66
wesqc_concor = fread('/Users/chavans/juno/work/ccs/ccs_wes/Proj_07871_DFLOQ/Result/qc/concordance_qc.txt')
dim(wesqc_concor); head(wesqc_concor) #Sample T_N
filter(wesqc_concor, Concordance <90) #s_C_001601_P001_d__s_C_001601_N002_d 92.03
wesqc_contam = fread('/Users/chavans/juno/work/ccs/ccs_wes/Proj_07871_DFLOQ/Result/qc/contamination_qc.txt')
dim(wesqc_contam); head(wesqc_contam) #Sample
filter(wesqc_contam, Contamination >= 5) #s_C_001601_P001_d__s_C_001601_N002_d T s_C_001601_P001_d 5.518
wesmaf = fread('/Users/chavans/juno/work/ccs/ccs_wes/Proj_07871_DFLOQ/Result/somatic/mut_somatic.maf')
dim(wesmaf); head(wesmaf) #Tumor_Sample_Barcode
wesids = fread('/Users/chavans/juno/work/ccs/chavans/res/bladder_kdm6a/WES_subset_ids.txt')
dim(wesids); head(wesids) #SampleID
wessamplelevel_pure = filter(wessamplelevel, !(LOW_PURITY == TRUE), !(sample %like% 's_C_001601'), !(sample %like% 's_C_001390'))
Private(wessamplelevel_pure$PTN)
wes_cohort = fread('~/kdm6a_paper/final_WES_cohort_kdm6a.txt') %>% filter(WES == "Y") %>% select(`CMO WES data`, `CMO_paths`, `CMO Sample ID (WES)`)
names(wes_cohort)
length(unique(wes_cohort$`Matched Pair #`)) #25
length(unique(wes_cohort$`DMP Patient ID`)) #25
length(unique(wes_cohort$`IMPACT Sample ID`)) #51
length(unique(wes_cohort$`CMO WES data`)) #11 batches
##Multiple samples
multi = wessamplelevel %>% group_by(PTN) %>% dplyr::dplyr::summarise(total = n()) %>% filter(total > 2) %>% select(PTN)