-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.R
73 lines (57 loc) · 2.6 KB
/
model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
## final
library(readr)
X_train_updt <- read_csv("~/ownCloud/Scripts/computefest/data/X_train_updt.csv")
X_test_updt <- read_csv("~/ownCloud/Scripts/computefest/data/X_test_updt.csv")
final_data <- read_csv("~/ownCloud/Scripts/computefest/data/final_data.csv")
X_complete <- final_data
X_train_updt$X1 <- NULL
X_complete <- rbind(X_train_updt, X_test_updt)
X_complete$X1 <- NULL
ids <- X_train_updt$Doctor.Identifier
X_complete$Specialty.Category <- as.factor(X_complete$Specialty.Category)
X_complete$Provider.Type <- NULL
X_complete$Freq <- NULL
X_complete$Doctor.Identifier <- NULL
# remove percentages
X_complete$Percent.Alzheimer.s.Disease.or.Dementia <- NULL
X_complete$Percent.Atrial.Fibrillation <- NULL
X_complete$Percent.Asthma <- NULL
X_complete$Percent.Cancer <- NULL
X_complete$Percent.Heart.Failure <- NULL
X_complete$Percent.Chronic.Kidney.Disease <- NULL
X_complete$Percent.Chronic.Obstructive.Pulmonary.Disease <- NULL
X_complete$Percent.Depression <- NULL
X_complete$Percent.Diabetes <-NULL
X_complete$Percent.Hyperlipidemia <- NULL
X_complete$Percent.Hypertension <- NULL
X_complete$Percent.Ischemic.Heart.Disease <- NULL
X_complete$Percent.Osteoporosis <- NULL
X_complete$Percent.Rheumatoid.Arthritis.or.Osteoarthritis <- NULL
X_complete$Percent.Schizophrenia.or.Other.Psychotic.Disorders <- NULL
X_complete$Percent.Stroke <- NULL
# generate new variables
X_complete$AllowPayFrac <- X_complete$Total.Allowed.Amount / X_complete$Total.Payment.Amount
X_complete$PayFrac <- X_complete$Total.Payment.Amount / X_complete$Total.Standardized.Payment.Amount
library(h2o)
h2o.init()
# generate deep features
h2o.df <- as.h2o(X_complete)
feature_names <- colnames(h2o.df)
model_nn <- h2o.deeplearning(x = feature_names,
training_frame = h2o.df,
model_id = "model_nn3",
autoencoder = TRUE,
reproducible = FALSE, #slow - turn off for real problems
ignore_const_cols = FALSE,
seed = 43,
hidden = c(50, 10, 2, 50),
epochs = 100,
l1 = 0.001,
activation = "TanhWithDropout")
feats <- h2o.deepfeatures(model_nn, h2o.df, layer = 3)
X_iso <- cbind(X_complete, as.data.frame(feats))
library(isofor)
mod <- iForest(X = X_iso, 100, 40)
X_complete$AllowPayFrac <- X_train_updt$Total.Allowed.Amount / X_train_updt$Total.Payment.Amount
X_complete$PayFrac <- X_train_updt$Total.Payment.Amount / X_train_updt$Total.Standardized.Payment.Amount
# generate deep features