BDHU · BDHU · Dec 17, 2025
diff --git a/model_slo.py b/model_slo.py
@@ -0,0 +1,73 @@
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split, RandomizedSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
+from sklearn.pipeline import Pipeline
+from scipy.stats import uniform, randint
+from xgboost import XGBClassifier
+import joblib
+
+df = pd.read_csv("expanded_pipeline_data_slo.csv")
+
+y = df["violate"]
+x = df.drop(columns=["violate"])
+
+x_train, x_test, y_train, y_test = train_test_split(
+    x, y, test_size=0.2, random_state=42
+)
+
+# Logistic Regression pipeline
+linear_pipeline = Pipeline(
+    [("scaler", StandardScaler()), ("logistic", LogisticRegression(random_state=42))]
+)
+
+linear_pipeline.fit(x_train, y_train)
+lr_preds = linear_pipeline.predict(x_test)
+
+# XGBoost pipeline
+xg_pipeline = Pipeline(
+    [("xgb", XGBClassifier(objective="binary:logistic", random_state=42))]
+)
+
+param_dist = {
+    "xgb__n_estimators": randint(100, 500),
+    "xgb__max_depth": randint(3, 15),
+    "xgb__learning_rate": uniform(0.005, 0.3),
+    "xgb__subsample": uniform(0.5, 0.5),
+    "xgb__colsample_bytree": uniform(0.5, 0.5),
+    "xgb__reg_alpha": uniform(0, 1),
+    "xgb__reg_lambda": uniform(0, 1),
+}
+
+random_search = RandomizedSearchCV(
+    xg_pipeline,
+    param_distributions=param_dist,
+    n_iter=30,
+    scoring="f1",
+    cv=5,
+    verbose=1,
+    n_jobs=-1,
+    random_state=42,
+)
+
+random_search.fit(x_train, y_train)
+best_model = random_search.best_estimator_
+xgb_preds = best_model.predict(x_test)
+
+
+def eval_model(y_true, y_pred, model_name):
+    print(f"Evaluation metrics for {model_name}:")
+    print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
+    print(f"Precision: {precision_score(y_true, y_pred)}")
+    print(f"Recall: {recall_score(y_true, y_pred)}")
+    print(f"F1 Score: {f1_score(y_true, y_pred)}")
+    print(f"Classification Report:\n{classification_report(y_true, y_pred)}\n")
+
+
+eval_model(y_test, lr_preds, "Logistic Regression")
+eval_model(y_test, xgb_preds, "XGBoost")
+
+joblib.dump(best_model, "xgb_pipeline.joblib")
+joblib.dump(linear_pipeline, "linear_pipeline.joblib")