diff --git a/training.R b/training.R
index 08283f1..0cb6a28 100644
--- a/training.R
+++ b/training.R
@@ -1,11 +1,34 @@
 train_save_model <- function(cleaned_df, outcome_df) {
   
-  set.seed(1) # not useful here because logistic regression deterministic
   
+  # Combine cleaned_df and outcome_df to match on ID
   model_df <- merge(cleaned_df, outcome_df, by = "nomem_encr")
   
-  model <- glm(new_child ~ age + mean_income_imp, family = "binomial", data = model_df)
+  # glmnet requires matrix, merge turned it into data.frame
+  model_df <- as.matrix(model_df)
+  
+  # features without outcome and identifier
+  X <- model_df[ , !(colnames(model_df) %in% c("nomem_encr", "new_child"))]
+  # outcome only
+  y <- model_df[ , colnames(model_df) == "new_child"]
+  
+  # LASSO regression
+  # cross-validation, to retrieve ideal lambda
+  # hyperparameter tuning
+  set.seed(1)
+  CV <- cv.glmnet(x = X, 
+                  y = y, 
+                  family = "binomial",
+                  nfolds = 10, standardize = FALSE)
+  optimal_lambda_test <- CV$lambda.min
+  
+  # Run model with optimal lambda
+  model <- glmnet(x = X, 
+                  y = y, 
+                  family = "binomial", 
+                  lambda = optimal_lambda_test, standardize = FALSE )
   
   # Save the model
   saveRDS(model, "model.rds")
+  
 }
\ No newline at end of file