Skip to content

Commit

Permalink
Update training.R
Browse files Browse the repository at this point in the history
  • Loading branch information
lisasivak authored Apr 23, 2024
1 parent 553771f commit 4b5a7bc
Showing 1 changed file with 11 additions and 26 deletions.
37 changes: 11 additions & 26 deletions training.R
Original file line number Diff line number Diff line change
@@ -1,34 +1,19 @@
train_save_model <- function(cleaned_df, outcome_df) {
# Trains a model using the cleaned dataframe and saves the model to a file.

# Parameters:
# cleaned_df (dataframe): The cleaned data from clean_df function to be used for training the model.
# outcome_df (dataframe): The data with the outcome variable (e.g., from PreFer_train_outcome.csv or PreFer_fake_outcome.csv).

# Combine cleaned_df and outcome_df to match on ID
model_df <- merge(cleaned_df, outcome_df, by = "nomem_encr")

# glmnet requires matrix, merge turned it into data.frame
model_df <- as.matrix(model_df)

# features without outcome and identifier
X <- model_df[ , !(colnames(model_df) %in% c("nomem_encr", "new_child"))]
# outcome only
y <- model_df[ , colnames(model_df) == "new_child"]
## This script contains a bare minimum working example
set.seed(1) # not useful here because logistic regression deterministic

# LASSO regression
# cross-validation, to retrieve ideal lambda
# hyperparameter tuning
set.seed(1)
CV <- cv.glmnet(x = X,
y = y,
family = "binomial",
nfolds = 10, standardize = FALSE)
optimal_lambda_test <- CV$lambda.min
# Combine cleaned_df and outcome_df
model_df <- merge(cleaned_df, outcome_df, by = "nomem_encr")

# Run model with optimal lambda
model <- glmnet(x = X,
y = y,
family = "binomial",
lambda = optimal_lambda_test, standardize = FALSE )
# Logistic regression model
model <- glm(new_child ~ age, family = "binomial", data = model_df)

# Save the model
saveRDS(model, "model.rds")

}
}

0 comments on commit 4b5a7bc

Please sign in to comment.