diff --git a/__pycache__/main.cpython-312.pyc b/__pycache__/main.cpython-312.pyc
index 67fd321..77053c2 100644
Binary files a/__pycache__/main.cpython-312.pyc and b/__pycache__/main.cpython-312.pyc differ
diff --git a/main.py b/main.py
index f37d285..ae56a0f 100644
--- a/main.py
+++ b/main.py
@@ -6,7 +6,7 @@
from pydantic import BaseModel
from starlette.requests import Request
import pandas as pd
-import model.LinearRegression.LinearRegression as lr
+import model.Regression as lr
import numpy as np
from sklearn.linear_model import LinearRegression
@@ -98,8 +98,6 @@ async def remove_columns(request: Request, columns_to_remove: List[str] = Form(.
})
-
-
@app.post("/train")
async def train_model(request: Request, predictor_column: str = Form(...), dataset: str = Form(...)):
global df # Access the global df variable
@@ -119,14 +117,34 @@ async def train_model(request: Request, predictor_column: str = Form(...), datas
})
# Train the model using the selected predictor column
- best_features, best_r2 = lr.StartTraining(predictor_column, df)
+ results = lr.StartTraining(predictor_column, df)
+
+ # Prepare the output for each model in a list format
+ model_results = [
+ {
+ "model": result[0],
+ "features": result[1],
+ "r2_score": result[2]
+ } for result in results
+ ]
+
+ # Sort model results by R² score in decreasing order
+ model_results.sort(key=lambda x: x['r2_score'], reverse=True)
+
+ # Find the best model based on R² score (first in sorted list)
+ best_model_result = model_results[0] if model_results else None
+
+ # Prepare a list of models excluding the best model for display
+ other_models = model_results[1:] if model_results else []
- # Return the template with the dataset, best features, and R² score
+ # Return the template with the best model and all models
return templates.TemplateResponse("index.html", {
"request": request,
- "predict_column":predictor_column,
- "best_features": best_features,
- "best_r2": best_r2,
+ "predict_column": predictor_column,
+ "best_model": best_model_result['model'] if best_model_result else None, # Best model name
+ "best_features": best_model_result['features'] if best_model_result else None, # Best features
+ "best_r2": best_model_result['r2_score'] if best_model_result else None, # Best R² score
+ "other_models": other_models, # All other models
"dataset_used": dataset, # Show which dataset was used in training
"selected_dataset": dataset
})
diff --git a/model/LinearRegression/LinearRegression.py b/model/LinearRegression/LinearRegression.py
deleted file mode 100644
index f570e29..0000000
--- a/model/LinearRegression/LinearRegression.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LinearRegression
-from sklearn.metrics import r2_score
-
-def LoadDataset(df):
- print("Available columns in the dataset:", df.columns)
- return df.columns
-
-def StartTraining(target_column, df):
-
- # Drop rows with missing values
- df = df.dropna()
-
- # Remove the target column from the list of features
- df_features = df.drop(target_column, axis=1)
-
- # List of all features
- filtered_columns = df_features.columns.tolist()
-
- print(f"\nInitial Features: {filtered_columns}")
-
- # Perform forward selection
- best_features, best_r2 = forward_selection(filtered_columns.copy(), target_column, df)
- return best_features, best_r2
-
-
-# Stepwise forward selection function
-def forward_selection(features_list, target, df):
- selected_features = []
- best_r2 = -float('inf')
- current_best_r2 = 0
- while len(features_list) > 0:
- temp_r2_scores = []
- for feature in features_list:
- combo = selected_features + [feature]
- X_train, X_test, y_train, y_test = train_test_split(df[combo], df[target], test_size=0.2, random_state=42)
- model = LinearRegression()
- model.fit(X_train, y_train)
- y_pred = model.predict(X_test)
- r2 = r2_score(y_test, y_pred)
- temp_r2_scores.append((combo, r2))
-
- # Find the best new feature to add
- best_combo, best_r2 = max(temp_r2_scores, key=lambda x: x[1])
-
- if best_r2 > current_best_r2:
- current_best_r2 = best_r2
- selected_features = best_combo
- features_list.remove(best_combo[-1]) # Remove the best feature from available features
- else:
- break # If no improvement, stop
-
- return selected_features, current_best_r2
-
diff --git a/model/LinearRegression/__pycache__/LinearRegression.cpython-312.pyc b/model/LinearRegression/__pycache__/LinearRegression.cpython-312.pyc
deleted file mode 100644
index 7f6ab61..0000000
Binary files a/model/LinearRegression/__pycache__/LinearRegression.cpython-312.pyc and /dev/null differ
diff --git a/model/Regression.py b/model/Regression.py
new file mode 100644
index 0000000..d698ac7
--- /dev/null
+++ b/model/Regression.py
@@ -0,0 +1,143 @@
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
+from sklearn.preprocessing import PolynomialFeatures
+from sklearn.svm import SVR
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+from sklearn.metrics import r2_score
+from sklearn.feature_selection import RFE
+from joblib import Parallel, delayed
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+# PyTorch-based Neural Network for regression
+class NeuralNetworkRegressor(nn.Module):
+ def __init__(self, input_size):
+ super(NeuralNetworkRegressor, self).__init__()
+ self.fc1 = nn.Linear(input_size, 128)
+ self.fc2 = nn.Linear(128, 64)
+ self.fc3 = nn.Linear(64, 1)
+
+ def forward(self, x):
+ x = torch.relu(self.fc1(x))
+ x = torch.relu(self.fc2(x))
+ x = self.fc3(x)
+ return x
+
+# Load dataset function
+def LoadDataset(df):
+ print("Available columns in the dataset:", df.columns)
+ return df.columns
+
+# Start training with optimized models and RFE for feature selection
+def StartTraining(target_column, df):
+ # Drop rows with missing values
+ df = df.dropna()
+
+ # Remove the target column from the list of features
+ df_features = df.drop(target_column, axis=1)
+
+ # List of all features
+ filtered_columns = df_features.columns.tolist()
+
+ print(f"\nInitial Features: {filtered_columns}")
+
+ # Perform RFE with multiple models
+ results = select_features_with_rfe(filtered_columns, target_column, df)
+
+ return results # Return the results list
+
+# Feature selection using Recursive Feature Elimination (RFE) for different models
+def select_features_with_rfe(features_list, target, df):
+ results = [] # Store results in a list
+
+ # Available models, including PyTorch Neural Network
+ models = {
+ 'LinearRegression': LinearRegression(),
+ 'Ridge': Ridge(),
+ 'Lasso': Lasso(),
+ 'ElasticNet': ElasticNet(),
+ 'SVR': SVR(kernel='linear'),
+ 'DecisionTree': DecisionTreeRegressor(),
+ 'RandomForest': RandomForestRegressor(n_estimators=50, max_depth=5),
+ 'GradientBoosting': GradientBoostingRegressor(n_estimators=50, max_depth=3),
+ 'PyTorchNN': NeuralNetworkRegressor
+ }
+
+ # Parallelize feature selection and model evaluation
+ results = Parallel(n_jobs=-1)(delayed(evaluate_model_with_rfe)(model_name, model, df, features_list, target)
+ for model_name, model in models.items())
+
+ return results # Return the list of results
+
+# Helper function to perform RFE and evaluate each model
+def evaluate_model_with_rfe(model_name, model, df, features, target):
+ X = df[features]
+ y = df[target]
+
+ # Perform RFE with the model, skipping for PyTorch (RFE only for sklearn models)
+ if model_name != 'PyTorchNN':
+ rfe = RFE(estimator=model, n_features_to_select=5) # Select top 5 features
+ X_rfe = rfe.fit_transform(X, y)
+ else:
+ X_rfe = X.values # No RFE for PyTorch
+
+ # Split the dataset
+ X_train, X_test, y_train, y_test = train_test_split(X_rfe, y, test_size=0.2, random_state=42)
+
+ if model_name == 'PyTorchNN':
+ # Convert data to PyTorch tensors and use GPU
+ X_train_tensor = torch.tensor(X_train, dtype=torch.float32).cuda()
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).cuda().unsqueeze(1)
+ X_test_tensor = torch.tensor(X_test, dtype=torch.float32).cuda()
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).cuda().unsqueeze(1)
+
+ # Initialize PyTorch model
+ model = NeuralNetworkRegressor(X_train.shape[1]).cuda()
+
+ # Print GPU usage
+ print("Training PyTorch model on GPU." if torch.cuda.is_available() else "Training PyTorch model on CPU.")
+
+ # Loss function and optimizer
+ criterion = nn.MSELoss()
+ optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+ # Train the PyTorch model
+ for epoch in range(100):
+ model.train()
+ optimizer.zero_grad()
+ outputs = model(X_train_tensor)
+ loss = criterion(outputs, y_train_tensor)
+ loss.backward()
+ optimizer.step()
+
+ # Evaluate the PyTorch model
+ model.eval()
+ with torch.no_grad():
+ y_pred = model(X_test_tensor).cpu().numpy()
+
+ r2 = r2_score(y_test, y_pred)
+ selected_features = features # No RFE applied
+ else:
+ # Train the model for scikit-learn models
+ model.fit(X_train, y_train)
+ y_pred = model.predict(X_test)
+
+ # Calculate the R² score
+ r2 = r2_score(y_test, y_pred)
+
+ # Get the selected features for scikit-learn models
+ selected_features = [features[i] for i in range(len(features)) if rfe.support_[i]]
+
+ # Print the model, selected features, and R² score
+ print(f"Model: {model_name}, Selected Features: {selected_features}, R² score: {r2:.4f}")
+
+ return model_name, selected_features, r2 # Return results
+
+# Example usage:
+# df = pd.read_csv('your_dataset.csv')
+# target_column = 'target_column_name'
+# results = StartTraining(target_column, df)
+# print(f"\nResults: {results}")
\ No newline at end of file
diff --git a/model/__pycache__/Regression.cpython-312.pyc b/model/__pycache__/Regression.cpython-312.pyc
new file mode 100644
index 0000000..d4f1625
Binary files /dev/null and b/model/__pycache__/Regression.cpython-312.pyc differ
diff --git a/static/index.css b/static/index.css
index 588167e..47b8248 100644
--- a/static/index.css
+++ b/static/index.css
@@ -117,29 +117,57 @@ button[type="submit"]:hover {
background-color: #357ABD;
}
-#result {
- background-color: #222; /* Dark background */
- color: #fff; /* White text */
- border-radius: 5px; /* Rounded corners */
- padding: 15px; /* Padding */
- margin-top: 20px; /* Spacing above */
- box-shadow: 0 2px 10px rgba(0, 0, 0, 0.5); /* Subtle shadow */
+#best-result {
+ background-color: #2c2c2c; /* Slightly lighter dark background for best result */
+ color: #fff; /* White text */
+ border-radius: 5px; /* Rounded corners */
+ padding: 15px; /* Padding */
+ margin-top: 20px; /* Spacing above */
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.6); /* More pronounced shadow */
}
-#result h3 {
- border-bottom: 2px solid #444; /* Underline for header */
+#best-result h3 {
+ border-bottom: 2px solid #444; /* Underline for best result header */
padding-bottom: 10px; /* Space below the header */
}
-#result ul {
+#best-result ul {
list-style-type: disc; /* Bullet points for list */
- padding-left: 20px; /* Indentation for list */
+ padding-left: 20px; /* Indentation for list */
}
-#result p {
+#best-result p {
font-weight: bold; /* Bold text for R² value */
}
+#other-results {
+ background-color: #1a1a1a; /* Darker background for other results */
+ color: #fff; /* White text */
+ border-radius: 5px; /* Rounded corners */
+ padding: 15px; /* Padding */
+ margin-top: 20px; /* Spacing above */
+ box-shadow: 0 2px 10px rgba(0, 0, 0, 0.5); /* Subtle shadow */
+}
+
+#other-results h4 {
+ border-top: 2px solid #555; Underline for other results header
+ padding-bottom: 8px; /* Space below the header */
+}
+
+#other-results ul {
+ list-style-type: circle; /* Circle bullet points for other models */
+ padding-left: 20px; /* Indentation for list */
+}
+
+#other-results p {
+ font-weight: normal; /* Normal weight for additional info */
+}
+
+#regression h3{
+ color: #cdc6c6;
+ text-align: center;
+ margin-bottom: 5px;
+}
/* Style for the columns section */
#columns-section {
diff --git a/templates/index.html b/templates/index.html
index 15c7057..c6ce4e4 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -17,7 +17,7 @@
AlgoPlay
Models
@@ -28,8 +28,9 @@ AlgoPlay
-
- Linear Regression
+
+ Regression
+ Train all the Regression models on selected dataset
{% endif %}
-
- {% if best_features %}
-
-
To predict {{predict_column}} these are the Best Features:
-
- {% for feature in best_features %}
- - {{ feature }}
- {% endfor %}
-
-
Best R² Value: {{ best_r2 }}
-
+
+ {% if best_model %}
{% if dataset_used %}
Dataset Used: {{ dataset_used }}
{% endif %}
-
- {% endif %}
+
+
The best model for predicting {{ predict_column }} is: {{ best_model }}
+
Best Features:
+
+ {% for feature in best_features %}
+ - {{ feature }}
+ {% endfor %}
+
+
Best R² Value: {{ best_r2 }}
+
+ {% endif %}
+
+
+ {% if other_models %}
+
+
Other Models Results:
+
+
+ {% endif %}
+