-
Notifications
You must be signed in to change notification settings - Fork 2
/
format_data.R
50 lines (40 loc) · 1.15 KB
/
format_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(data.table)
library(magrittr)
setwd("D://Autism/")
dat = fread("unorganized/israeli_data_utf8.manual_atc.csv", sep = ",", header = F, na.strings = c("NA","")
, stringsAsFactors = F, showProgress = T , fill = T, data.table = F )
names(dat) = c(
"NA", "CID",
"FID", "MID",
"as.x", "drugname",
"pills", "boxes",
"atc", "dddcode",
"pillspaid", "datemp",
"datrp", "dob",
"sex", "dod",
"ses", "sid",
"sibtype.x", "mdob.y",
"sibtype.y", "pdob.y",
"mage", "page",
"datediff", "exp",
"atc2", "preg",
"dxdate", "as.y",
"dxdaty", "dxdatm",
"dxdatd", "dxcode",
"dxdesc", "asd",
"fudate", "min",
"timefu", "doby",
"atc_manual"
)
dat.2 =dat[! duplicated(dat[,c(2,3,4,12,41)]),]
dat.sp = split(dat.2, f = dat.2$asd)
asd = dat.sp[[1]]
asd.preg = asd[asd$preg ==1 , ]
nrow(asd)
unaffected = dat.sp[[2]]
un.preg = unaffected[unaffected$preg == 1, ]
nrow(unaffected)
rm(dat.sp)
rm(dat)
unaffected.cases = unique(unaffected[,c(2,3,4)])
asd.cases = unique(asd[,c(2,3,4)])