adding all the regression models

ShubhamSupekar · Oct 18, 2024 · 688b5ff · 688b5ff
1 parent 547c441
commit 688b5ff
Show file tree

Hide file tree

Showing 8 changed files with 248 additions and 91 deletions.
diff --git a/__pycache__/main.cpython-312.pyc b/__pycache__/main.cpython-312.pyc
diff --git a/main.py b/main.py
@@ -6,7 +6,7 @@
 from pydantic import BaseModel
 from starlette.requests import Request
 import pandas as pd 
-import model.LinearRegression.LinearRegression as lr 
+import model.Regression as lr 
 import numpy as np
 from sklearn.linear_model import LinearRegression
 
@@ -98,8 +98,6 @@ async def remove_columns(request: Request, columns_to_remove: List[str] = Form(.
     })
 
 
-
-
 @app.post("/train")
 async def train_model(request: Request, predictor_column: str = Form(...), dataset: str = Form(...)):
     global df  # Access the global df variable
@@ -119,14 +117,34 @@ async def train_model(request: Request, predictor_column: str = Form(...), datas
         })
 
     # Train the model using the selected predictor column
-    best_features, best_r2 = lr.StartTraining(predictor_column, df)
+    results = lr.StartTraining(predictor_column, df)
+
+    # Prepare the output for each model in a list format
+    model_results = [
+        {
+            "model": result[0],
+            "features": result[1],
+            "r2_score": result[2]
+        } for result in results
+    ]
+
+    # Sort model results by R² score in decreasing order
+    model_results.sort(key=lambda x: x['r2_score'], reverse=True)
+
+    # Find the best model based on R² score (first in sorted list)
+    best_model_result = model_results[0] if model_results else None
+
+    # Prepare a list of models excluding the best model for display
+    other_models = model_results[1:] if model_results else []
 
-    # Return the template with the dataset, best features, and R² score
+    # Return the template with the best model and all models
     return templates.TemplateResponse("index.html", {
         "request": request,
-        "predict_column":predictor_column,
-        "best_features": best_features,
-        "best_r2": best_r2,
+        "predict_column": predictor_column,
+        "best_model": best_model_result['model'] if best_model_result else None,  # Best model name
+        "best_features": best_model_result['features'] if best_model_result else None,  # Best features
+        "best_r2": best_model_result['r2_score'] if best_model_result else None,  # Best R² score
+        "other_models": other_models,  # All other models
         "dataset_used": dataset,  # Show which dataset was used in training
         "selected_dataset": dataset
     })

diff --git a/model/LinearRegression/LinearRegression.py b/model/LinearRegression/LinearRegression.py
diff --git a/model/LinearRegression/__pycache__/LinearRegression.cpython-312.pyc b/model/LinearRegression/__pycache__/LinearRegression.cpython-312.pyc
diff --git a/model/Regression.py b/model/Regression.py
@@ -0,0 +1,143 @@
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
+from sklearn.preprocessing import PolynomialFeatures
+from sklearn.svm import SVR
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+from sklearn.metrics import r2_score
+from sklearn.feature_selection import RFE
+from joblib import Parallel, delayed
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+# PyTorch-based Neural Network for regression
+class NeuralNetworkRegressor(nn.Module):
+    def __init__(self, input_size):
+        super(NeuralNetworkRegressor, self).__init__()
+        self.fc1 = nn.Linear(input_size, 128)
+        self.fc2 = nn.Linear(128, 64)
+        self.fc3 = nn.Linear(64, 1)
+
+    def forward(self, x):
+        x = torch.relu(self.fc1(x))
+        x = torch.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+
+# Load dataset function
+def LoadDataset(df):
+    print("Available columns in the dataset:", df.columns)
+    return df.columns
+
+# Start training with optimized models and RFE for feature selection
+def StartTraining(target_column, df):
+    # Drop rows with missing values
+    df = df.dropna()
+
+    # Remove the target column from the list of features
+    df_features = df.drop(target_column, axis=1)
+
+    # List of all features
+    filtered_columns = df_features.columns.tolist()
+
+    print(f"\nInitial Features: {filtered_columns}")
+
+    # Perform RFE with multiple models
+    results = select_features_with_rfe(filtered_columns, target_column, df)
+
+    return results  # Return the results list
+
+# Feature selection using Recursive Feature Elimination (RFE) for different models
+def select_features_with_rfe(features_list, target, df):
+    results = []  # Store results in a list
+
+    # Available models, including PyTorch Neural Network
+    models = {
+        'LinearRegression': LinearRegression(),
+        'Ridge': Ridge(),
+        'Lasso': Lasso(),
+        'ElasticNet': ElasticNet(),
+        'SVR': SVR(kernel='linear'),
+        'DecisionTree': DecisionTreeRegressor(),
+        'RandomForest': RandomForestRegressor(n_estimators=50, max_depth=5),
+        'GradientBoosting': GradientBoostingRegressor(n_estimators=50, max_depth=3),
+        'PyTorchNN': NeuralNetworkRegressor
+    }
+
+    # Parallelize feature selection and model evaluation
+    results = Parallel(n_jobs=-1)(delayed(evaluate_model_with_rfe)(model_name, model, df, features_list, target)
+                                   for model_name, model in models.items())
+
+    return results  # Return the list of results
+
+# Helper function to perform RFE and evaluate each model
+def evaluate_model_with_rfe(model_name, model, df, features, target):
+    X = df[features]
+    y = df[target]
+
+    # Perform RFE with the model, skipping for PyTorch (RFE only for sklearn models)
+    if model_name != 'PyTorchNN':
+        rfe = RFE(estimator=model, n_features_to_select=5)  # Select top 5 features
+        X_rfe = rfe.fit_transform(X, y)
+    else:
+        X_rfe = X.values  # No RFE for PyTorch
+
+    # Split the dataset
+    X_train, X_test, y_train, y_test = train_test_split(X_rfe, y, test_size=0.2, random_state=42)
+
+    if model_name == 'PyTorchNN':
+        # Convert data to PyTorch tensors and use GPU
+        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).cuda()
+        y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).cuda().unsqueeze(1)
+        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).cuda()
+        y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).cuda().unsqueeze(1)
+
+        # Initialize PyTorch model
+        model = NeuralNetworkRegressor(X_train.shape[1]).cuda()
+
+        # Print GPU usage
+        print("Training PyTorch model on GPU." if torch.cuda.is_available() else "Training PyTorch model on CPU.")
+
+        # Loss function and optimizer
+        criterion = nn.MSELoss()
+        optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+        # Train the PyTorch model
+        for epoch in range(100):
+            model.train()
+            optimizer.zero_grad()
+            outputs = model(X_train_tensor)
+            loss = criterion(outputs, y_train_tensor)
+            loss.backward()
+            optimizer.step()
+
+        # Evaluate the PyTorch model
+        model.eval()
+        with torch.no_grad():
+            y_pred = model(X_test_tensor).cpu().numpy()
+
+        r2 = r2_score(y_test, y_pred)
+        selected_features = features  # No RFE applied
+    else:
+        # Train the model for scikit-learn models
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_test)
+
+        # Calculate the R² score
+        r2 = r2_score(y_test, y_pred)
+
+        # Get the selected features for scikit-learn models
+        selected_features = [features[i] for i in range(len(features)) if rfe.support_[i]]
+
+    # Print the model, selected features, and R² score
+    print(f"Model: {model_name}, Selected Features: {selected_features}, R² score: {r2:.4f}")
+
+    return model_name, selected_features, r2  # Return results
+
+# Example usage:
+# df = pd.read_csv('your_dataset.csv')
+# target_column = 'target_column_name'
+# results = StartTraining(target_column, df)
+# print(f"\nResults: {results}")  
diff --git a/model/__pycache__/Regression.cpython-312.pyc b/model/__pycache__/Regression.cpython-312.pyc
diff --git a/static/index.css b/static/index.css
@@ -117,29 +117,57 @@ button[type="submit"]:hover {
     background-color: #357ABD;
 }
 
-#result {
-    background-color: #222;  /* Dark background */
-    color: #fff;              /* White text */
-    border-radius: 5px;       /* Rounded corners */
-    padding: 15px;            /* Padding */
-    margin-top: 20px;         /* Spacing above */
-    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.5); /* Subtle shadow */
+#best-result {
+    background-color: #2c2c2c;  /* Slightly lighter dark background for best result */
+    color: #fff;                 /* White text */
+    border-radius: 5px;         /* Rounded corners */
+    padding: 15px;              /* Padding */
+    margin-top: 20px;           /* Spacing above */
+    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.6); /* More pronounced shadow */
 }
 
-#result h3 {
-    border-bottom: 2px solid #444; /* Underline for header */
+#best-result h3 {
+    border-bottom: 2px solid #444; /* Underline for best result header */
     padding-bottom: 10px;          /* Space below the header */
 }
 
-#result ul {
+#best-result ul {
     list-style-type: disc; /* Bullet points for list */
-    padding-left: 20px;   /* Indentation for list */
+    padding-left: 20px;    /* Indentation for list */
 }
 
-#result p {
+#best-result p {
     font-weight: bold;      /* Bold text for R² value */
 }
 
+#other-results {
+    background-color: #1a1a1a;  /* Darker background for other results */
+    color: #fff;                /* White text */
+    border-radius: 5px;        /* Rounded corners */
+    padding: 15px;             /* Padding */
+    margin-top: 20px;          /* Spacing above */
+    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.5); /* Subtle shadow */
+}
+
+#other-results h4 {
+    border-top: 2px solid #555; Underline for other results header
+    padding-bottom: 8px;           /* Space below the header */
+}
+
+#other-results ul {
+    list-style-type: circle; /* Circle bullet points for other models */
+    padding-left: 20px;      /* Indentation for list */
+}
+
+#other-results p {
+    font-weight: normal;      /* Normal weight for additional info */
+}
+
+#regression h3{
+    color: #cdc6c6;
+    text-align: center;
+    margin-bottom: 5px;
+}
 
 /* Style for the columns section */
 #columns-section {