-
Notifications
You must be signed in to change notification settings - Fork 4
/
AKI project SIR Reshaping
82 lines (67 loc) · 3.49 KB
/
AKI project SIR Reshaping
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
Prior preprocesing: In excel, removed # symbols, removed start tabs from indented rows and edited rows with value in units column.
#Remove codes 443J1,443J2,443J200,44J3z00 which do not contain numeric data
setwd("c:/Users/mqbpjhr4/Documents")
Patients<-read.table("Patients.csv",header=TRUE, sep=",")
tests<-read.table("tests.csv",header=TRUE,sep= ",")
SIR<- merge(Patients,tests,all=TRUE, by='PatID')
require(lubridate)
SIR$Age<-(as.numeric(year(strptime(SIR$EntryDate, format="%d/%m/%Y"))))-SIR$Year_Of_Birth
attach(SIR)
Over<-subset(SIR,SIR$Age>15 & as.numeric(year(strptime(SIR$EntryDate, format="%d/%m/%Y")))>=2009,)
Over$location<- ifelse(Over$Source=="salfordt", c("Hospital"),("GP"))
attach(Over)
levels(Over$ReadCode)
#Group redundant read codes/rubrics
levels(Over$ReadCode)[1:23] <- "dialysisorckd"
levels(Over$ReadCode)[3:7] <- "eGFR"
levels(Over$ReadCode)[4:5] <- "transplant"
levels(Over$ReadCode)[2] <- "creatinine"
levels(Over$ReadCode)[5:12] <- "dialysisorckd"
table(Over$ReadCode)
attach(Over)
#Drop all records entered after commencement of dialysis/transplant
Over$endpoint<-with(Over, ifelse(ReadCode=="dialysisorckd",paste(EntryDate), "NA"))
Over$endpoint<-with(Over, ifelse(ReadCode=="transplant",paste(EntryDate), paste(endpoint)))
Over$date_diff <- ifelse(!is.na(Over$endpoint),
(as.Date
(as.character(Over$endpoint), format="%Y/%m/%d")-as.Date(as.character(Over$EntryDate), format="%d/%m/%Y")
), print("NA"))
Over<-Over[is.na(Over$date_diff), ]
#date_diff is never more than 0 in this dataset
#This gives you a subset of records for patients before CKD, transplant or dialysis
#Check which units have been used to measure Creatinine and convert #from mg/dL to umol/L if needed
table(Over$Units)
levels(Over$Units)
head(Over[Over$Units=="%",])
levels(Over$Units)[1]<-"mLmin173m2"
levels(Over$Units)[13:22]<-"mLmin173m2"
table(Over$Units)
tail(Over[Over$Units=="/min",])
levels(Over$Units)[4]<-"mLmin173m2"
levels(Over$Units)[8]<-"mLmin173m2"
table(Over$Units)
unique(Over$Desc[Over$Units=="/mL"])
levels(Over$Units)[4]<-"mLmin173m2"
levels(Over$Units)[7]<-"umol/L"
table(Over$ReadCode,Over$Units)
Over$stcreat<- ifelse(Over$Units=="umol/L"| Over$Units=="None",Over$Value,(Over$Value/1000))
Over$stcreat<- ifelse(Over$ReadCode=="creatinine",paste(Over$stcreat), paste("NA"))
Over$mdrd<- ifelse(Over$Units=="mLmin173m2"| Over$Units=="None",paste(Over$Value),paste(Over$value/1440))
Over$mdrd<- ifelse(Over$ReadCode=="eGFR",paste(Over$mdrd), paste("NA"))
#Check for and delete delayed duplicates
class(EntryDate)
Over$EntryDate<-as.Date(Over$EntryDate, format="%d/%m/%Y")
MONTH<- format(Over$EntryDate, "%m")
YEAR<- format(Over$EntryDate, "%Y")
Over$EntryPeriod<-paste(MONTH,YEAR)
library(lubridate)
d<-Over[!(duplicated(Over$Value)&duplicated(Over$PatID)&duplicated(Over$EntryPeriod)),]
d2<-subset(d,d$ReadCode=="creatinine",)
e<-Over[!(duplicated(Over$Value)&duplicated(Over$PatID)),]
e2<-subset(e,d$ReadCode=="creatinine",)
df<-subset(d,d$Sex=="F"&d$ReadCode=="creatinine",)
#Write an input file for STATA with headings complementary Aberdeen algorithm
SIRinput<-file(paste("SIR_Rinput.csv"), open="w")
cat("studyid","dos","stcreat","mdrd","location_code","age","sex","\n", sep=",",file="SIR_Rinput.csv",append=TRUE)
for (n in 1:205667){ #Change loop number to number of records in dataset
cat((paste(d2$PatID[n])),(paste(d2$EntryDate[n])), (paste(d2$stcreat[n])),(paste(d2$mdrd[n])), (paste(d2$Source[n])),(paste(d2$Age[n])), (paste(d2$Sex[n])), "\n", file="SIR_Rinput.csv", sep=",", fill=FALSE, labels=NULL, append=TRUE)}