Skip to content

Commit

Permalink
upd training
Browse files Browse the repository at this point in the history
  • Loading branch information
lisasivak committed Apr 1, 2024
1 parent 8f8fdbf commit 08a2d02
Showing 1 changed file with 25 additions and 2 deletions.
27 changes: 25 additions & 2 deletions training.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,34 @@
train_save_model <- function(cleaned_df, outcome_df) {

set.seed(1) # not useful here because logistic regression deterministic

# Combine cleaned_df and outcome_df to match on ID
model_df <- merge(cleaned_df, outcome_df, by = "nomem_encr")

model <- glm(new_child ~ age + mean_income_imp, family = "binomial", data = model_df)
# glmnet requires matrix, merge turned it into data.frame
model_df <- as.matrix(model_df)

# features without outcome and identifier
X <- model_df[ , !(colnames(model_df) %in% c("nomem_encr", "new_child"))]
# outcome only
y <- model_df[ , colnames(model_df) == "new_child"]

# LASSO regression
# cross-validation, to retrieve ideal lambda
# hyperparameter tuning
set.seed(1)
CV <- cv.glmnet(x = X,
y = y,
family = "binomial",
nfolds = 10, standardize = FALSE)
optimal_lambda_test <- CV$lambda.min

# Run model with optimal lambda
model <- glmnet(x = X,
y = y,
family = "binomial",
lambda = optimal_lambda_test, standardize = FALSE )

# Save the model
saveRDS(model, "model.rds")

}

0 comments on commit 08a2d02

Please sign in to comment.