diff --git a/__pycache__/main.cpython-312.pyc b/__pycache__/main.cpython-312.pyc index 67fd321..77053c2 100644 Binary files a/__pycache__/main.cpython-312.pyc and b/__pycache__/main.cpython-312.pyc differ diff --git a/main.py b/main.py index f37d285..ae56a0f 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ from pydantic import BaseModel from starlette.requests import Request import pandas as pd -import model.LinearRegression.LinearRegression as lr +import model.Regression as lr import numpy as np from sklearn.linear_model import LinearRegression @@ -98,8 +98,6 @@ async def remove_columns(request: Request, columns_to_remove: List[str] = Form(. }) - - @app.post("/train") async def train_model(request: Request, predictor_column: str = Form(...), dataset: str = Form(...)): global df # Access the global df variable @@ -119,14 +117,34 @@ async def train_model(request: Request, predictor_column: str = Form(...), datas }) # Train the model using the selected predictor column - best_features, best_r2 = lr.StartTraining(predictor_column, df) + results = lr.StartTraining(predictor_column, df) + + # Prepare the output for each model in a list format + model_results = [ + { + "model": result[0], + "features": result[1], + "r2_score": result[2] + } for result in results + ] + + # Sort model results by R² score in decreasing order + model_results.sort(key=lambda x: x['r2_score'], reverse=True) + + # Find the best model based on R² score (first in sorted list) + best_model_result = model_results[0] if model_results else None + + # Prepare a list of models excluding the best model for display + other_models = model_results[1:] if model_results else [] - # Return the template with the dataset, best features, and R² score + # Return the template with the best model and all models return templates.TemplateResponse("index.html", { "request": request, - "predict_column":predictor_column, - "best_features": best_features, - "best_r2": best_r2, + "predict_column": predictor_column, + "best_model": best_model_result['model'] if best_model_result else None, # Best model name + "best_features": best_model_result['features'] if best_model_result else None, # Best features + "best_r2": best_model_result['r2_score'] if best_model_result else None, # Best R² score + "other_models": other_models, # All other models "dataset_used": dataset, # Show which dataset was used in training "selected_dataset": dataset }) diff --git a/model/LinearRegression/LinearRegression.py b/model/LinearRegression/LinearRegression.py deleted file mode 100644 index f570e29..0000000 --- a/model/LinearRegression/LinearRegression.py +++ /dev/null @@ -1,55 +0,0 @@ -import pandas as pd -from sklearn.model_selection import train_test_split -from sklearn.linear_model import LinearRegression -from sklearn.metrics import r2_score - -def LoadDataset(df): - print("Available columns in the dataset:", df.columns) - return df.columns - -def StartTraining(target_column, df): - - # Drop rows with missing values - df = df.dropna() - - # Remove the target column from the list of features - df_features = df.drop(target_column, axis=1) - - # List of all features - filtered_columns = df_features.columns.tolist() - - print(f"\nInitial Features: {filtered_columns}") - - # Perform forward selection - best_features, best_r2 = forward_selection(filtered_columns.copy(), target_column, df) - return best_features, best_r2 - - -# Stepwise forward selection function -def forward_selection(features_list, target, df): - selected_features = [] - best_r2 = -float('inf') - current_best_r2 = 0 - while len(features_list) > 0: - temp_r2_scores = [] - for feature in features_list: - combo = selected_features + [feature] - X_train, X_test, y_train, y_test = train_test_split(df[combo], df[target], test_size=0.2, random_state=42) - model = LinearRegression() - model.fit(X_train, y_train) - y_pred = model.predict(X_test) - r2 = r2_score(y_test, y_pred) - temp_r2_scores.append((combo, r2)) - - # Find the best new feature to add - best_combo, best_r2 = max(temp_r2_scores, key=lambda x: x[1]) - - if best_r2 > current_best_r2: - current_best_r2 = best_r2 - selected_features = best_combo - features_list.remove(best_combo[-1]) # Remove the best feature from available features - else: - break # If no improvement, stop - - return selected_features, current_best_r2 - diff --git a/model/LinearRegression/__pycache__/LinearRegression.cpython-312.pyc b/model/LinearRegression/__pycache__/LinearRegression.cpython-312.pyc deleted file mode 100644 index 7f6ab61..0000000 Binary files a/model/LinearRegression/__pycache__/LinearRegression.cpython-312.pyc and /dev/null differ diff --git a/model/Regression.py b/model/Regression.py new file mode 100644 index 0000000..d698ac7 --- /dev/null +++ b/model/Regression.py @@ -0,0 +1,143 @@ +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet +from sklearn.preprocessing import PolynomialFeatures +from sklearn.svm import SVR +from sklearn.tree import DecisionTreeRegressor +from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor +from sklearn.metrics import r2_score +from sklearn.feature_selection import RFE +from joblib import Parallel, delayed +import torch +import torch.nn as nn +import torch.optim as optim + +# PyTorch-based Neural Network for regression +class NeuralNetworkRegressor(nn.Module): + def __init__(self, input_size): + super(NeuralNetworkRegressor, self).__init__() + self.fc1 = nn.Linear(input_size, 128) + self.fc2 = nn.Linear(128, 64) + self.fc3 = nn.Linear(64, 1) + + def forward(self, x): + x = torch.relu(self.fc1(x)) + x = torch.relu(self.fc2(x)) + x = self.fc3(x) + return x + +# Load dataset function +def LoadDataset(df): + print("Available columns in the dataset:", df.columns) + return df.columns + +# Start training with optimized models and RFE for feature selection +def StartTraining(target_column, df): + # Drop rows with missing values + df = df.dropna() + + # Remove the target column from the list of features + df_features = df.drop(target_column, axis=1) + + # List of all features + filtered_columns = df_features.columns.tolist() + + print(f"\nInitial Features: {filtered_columns}") + + # Perform RFE with multiple models + results = select_features_with_rfe(filtered_columns, target_column, df) + + return results # Return the results list + +# Feature selection using Recursive Feature Elimination (RFE) for different models +def select_features_with_rfe(features_list, target, df): + results = [] # Store results in a list + + # Available models, including PyTorch Neural Network + models = { + 'LinearRegression': LinearRegression(), + 'Ridge': Ridge(), + 'Lasso': Lasso(), + 'ElasticNet': ElasticNet(), + 'SVR': SVR(kernel='linear'), + 'DecisionTree': DecisionTreeRegressor(), + 'RandomForest': RandomForestRegressor(n_estimators=50, max_depth=5), + 'GradientBoosting': GradientBoostingRegressor(n_estimators=50, max_depth=3), + 'PyTorchNN': NeuralNetworkRegressor + } + + # Parallelize feature selection and model evaluation + results = Parallel(n_jobs=-1)(delayed(evaluate_model_with_rfe)(model_name, model, df, features_list, target) + for model_name, model in models.items()) + + return results # Return the list of results + +# Helper function to perform RFE and evaluate each model +def evaluate_model_with_rfe(model_name, model, df, features, target): + X = df[features] + y = df[target] + + # Perform RFE with the model, skipping for PyTorch (RFE only for sklearn models) + if model_name != 'PyTorchNN': + rfe = RFE(estimator=model, n_features_to_select=5) # Select top 5 features + X_rfe = rfe.fit_transform(X, y) + else: + X_rfe = X.values # No RFE for PyTorch + + # Split the dataset + X_train, X_test, y_train, y_test = train_test_split(X_rfe, y, test_size=0.2, random_state=42) + + if model_name == 'PyTorchNN': + # Convert data to PyTorch tensors and use GPU + X_train_tensor = torch.tensor(X_train, dtype=torch.float32).cuda() + y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).cuda().unsqueeze(1) + X_test_tensor = torch.tensor(X_test, dtype=torch.float32).cuda() + y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).cuda().unsqueeze(1) + + # Initialize PyTorch model + model = NeuralNetworkRegressor(X_train.shape[1]).cuda() + + # Print GPU usage + print("Training PyTorch model on GPU." if torch.cuda.is_available() else "Training PyTorch model on CPU.") + + # Loss function and optimizer + criterion = nn.MSELoss() + optimizer = optim.Adam(model.parameters(), lr=0.001) + + # Train the PyTorch model + for epoch in range(100): + model.train() + optimizer.zero_grad() + outputs = model(X_train_tensor) + loss = criterion(outputs, y_train_tensor) + loss.backward() + optimizer.step() + + # Evaluate the PyTorch model + model.eval() + with torch.no_grad(): + y_pred = model(X_test_tensor).cpu().numpy() + + r2 = r2_score(y_test, y_pred) + selected_features = features # No RFE applied + else: + # Train the model for scikit-learn models + model.fit(X_train, y_train) + y_pred = model.predict(X_test) + + # Calculate the R² score + r2 = r2_score(y_test, y_pred) + + # Get the selected features for scikit-learn models + selected_features = [features[i] for i in range(len(features)) if rfe.support_[i]] + + # Print the model, selected features, and R² score + print(f"Model: {model_name}, Selected Features: {selected_features}, R² score: {r2:.4f}") + + return model_name, selected_features, r2 # Return results + +# Example usage: +# df = pd.read_csv('your_dataset.csv') +# target_column = 'target_column_name' +# results = StartTraining(target_column, df) +# print(f"\nResults: {results}") \ No newline at end of file diff --git a/model/__pycache__/Regression.cpython-312.pyc b/model/__pycache__/Regression.cpython-312.pyc new file mode 100644 index 0000000..d4f1625 Binary files /dev/null and b/model/__pycache__/Regression.cpython-312.pyc differ diff --git a/static/index.css b/static/index.css index 588167e..47b8248 100644 --- a/static/index.css +++ b/static/index.css @@ -117,29 +117,57 @@ button[type="submit"]:hover { background-color: #357ABD; } -#result { - background-color: #222; /* Dark background */ - color: #fff; /* White text */ - border-radius: 5px; /* Rounded corners */ - padding: 15px; /* Padding */ - margin-top: 20px; /* Spacing above */ - box-shadow: 0 2px 10px rgba(0, 0, 0, 0.5); /* Subtle shadow */ +#best-result { + background-color: #2c2c2c; /* Slightly lighter dark background for best result */ + color: #fff; /* White text */ + border-radius: 5px; /* Rounded corners */ + padding: 15px; /* Padding */ + margin-top: 20px; /* Spacing above */ + box-shadow: 0 4px 15px rgba(0, 0, 0, 0.6); /* More pronounced shadow */ } -#result h3 { - border-bottom: 2px solid #444; /* Underline for header */ +#best-result h3 { + border-bottom: 2px solid #444; /* Underline for best result header */ padding-bottom: 10px; /* Space below the header */ } -#result ul { +#best-result ul { list-style-type: disc; /* Bullet points for list */ - padding-left: 20px; /* Indentation for list */ + padding-left: 20px; /* Indentation for list */ } -#result p { +#best-result p { font-weight: bold; /* Bold text for R² value */ } +#other-results { + background-color: #1a1a1a; /* Darker background for other results */ + color: #fff; /* White text */ + border-radius: 5px; /* Rounded corners */ + padding: 15px; /* Padding */ + margin-top: 20px; /* Spacing above */ + box-shadow: 0 2px 10px rgba(0, 0, 0, 0.5); /* Subtle shadow */ +} + +#other-results h4 { + border-top: 2px solid #555; Underline for other results header + padding-bottom: 8px; /* Space below the header */ +} + +#other-results ul { + list-style-type: circle; /* Circle bullet points for other models */ + padding-left: 20px; /* Indentation for list */ +} + +#other-results p { + font-weight: normal; /* Normal weight for additional info */ +} + +#regression h3{ + color: #cdc6c6; + text-align: center; + margin-bottom: 5px; +} /* Style for the columns section */ #columns-section { diff --git a/templates/index.html b/templates/index.html index 15c7057..c6ce4e4 100644 --- a/templates/index.html +++ b/templates/index.html @@ -17,7 +17,7 @@

AlgoPlay

@@ -28,8 +28,9 @@

AlgoPlay

-
-

Linear Regression

+
+

Regression

+

Train all the Regression models on selected dataset

Select a dataset:

@@ -95,23 +96,45 @@

Select a Column for Prediction:

{% endif %} - - {% if best_features %} -
-

To predict {{predict_column}} these are the Best Features:

-
    - {% for feature in best_features %} -
  • {{ feature }}
  • - {% endfor %} -
-

Best R² Value: {{ best_r2 }}

- + + {% if best_model %} {% if dataset_used %}

Dataset Used: {{ dataset_used }}

{% endif %} -
- {% endif %} +
+

The best model for predicting {{ predict_column }} is: {{ best_model }}

+

Best Features:

+
    + {% for feature in best_features %} +
  • {{ feature }}
  • + {% endfor %} +
+

Best R² Value: {{ best_r2 }}

+
+ {% endif %} + + + {% if other_models %} +
+

Other Models Results:

+
    + {% for result in other_models %} +
  • +

    Model: {{ result.model }}

    +
    Features:
    +
      + {% for feature in result.features %} +
    • {{ feature }}
    • + {% endfor %} +
    +

    R² Value: {{ result.r2_score }}

    +
  • + {% endfor %} +
+
+ {% endif %} +