Skip to content

Commit

Permalink
adding all the regression models
Browse files Browse the repository at this point in the history
  • Loading branch information
ShubhamSupekar committed Oct 18, 2024
1 parent 547c441 commit 688b5ff
Show file tree
Hide file tree
Showing 8 changed files with 248 additions and 91 deletions.
Binary file modified __pycache__/main.cpython-312.pyc
Binary file not shown.
34 changes: 26 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pydantic import BaseModel
from starlette.requests import Request
import pandas as pd
import model.LinearRegression.LinearRegression as lr
import model.Regression as lr
import numpy as np
from sklearn.linear_model import LinearRegression

Expand Down Expand Up @@ -98,8 +98,6 @@ async def remove_columns(request: Request, columns_to_remove: List[str] = Form(.
})




@app.post("/train")
async def train_model(request: Request, predictor_column: str = Form(...), dataset: str = Form(...)):
global df # Access the global df variable
Expand All @@ -119,14 +117,34 @@ async def train_model(request: Request, predictor_column: str = Form(...), datas
})

# Train the model using the selected predictor column
best_features, best_r2 = lr.StartTraining(predictor_column, df)
results = lr.StartTraining(predictor_column, df)

# Prepare the output for each model in a list format
model_results = [
{
"model": result[0],
"features": result[1],
"r2_score": result[2]
} for result in results
]

# Sort model results by R² score in decreasing order
model_results.sort(key=lambda x: x['r2_score'], reverse=True)

# Find the best model based on R² score (first in sorted list)
best_model_result = model_results[0] if model_results else None

# Prepare a list of models excluding the best model for display
other_models = model_results[1:] if model_results else []

# Return the template with the dataset, best features, and R² score
# Return the template with the best model and all models
return templates.TemplateResponse("index.html", {
"request": request,
"predict_column":predictor_column,
"best_features": best_features,
"best_r2": best_r2,
"predict_column": predictor_column,
"best_model": best_model_result['model'] if best_model_result else None, # Best model name
"best_features": best_model_result['features'] if best_model_result else None, # Best features
"best_r2": best_model_result['r2_score'] if best_model_result else None, # Best R² score
"other_models": other_models, # All other models
"dataset_used": dataset, # Show which dataset was used in training
"selected_dataset": dataset
})
Expand Down
55 changes: 0 additions & 55 deletions model/LinearRegression/LinearRegression.py

This file was deleted.

Binary file not shown.
143 changes: 143 additions & 0 deletions model/Regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import r2_score
from sklearn.feature_selection import RFE
from joblib import Parallel, delayed
import torch
import torch.nn as nn
import torch.optim as optim

# PyTorch-based Neural Network for regression
class NeuralNetworkRegressor(nn.Module):
def __init__(self, input_size):
super(NeuralNetworkRegressor, self).__init__()
self.fc1 = nn.Linear(input_size, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 1)

def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x

# Load dataset function
def LoadDataset(df):
print("Available columns in the dataset:", df.columns)
return df.columns

# Start training with optimized models and RFE for feature selection
def StartTraining(target_column, df):
# Drop rows with missing values
df = df.dropna()

# Remove the target column from the list of features
df_features = df.drop(target_column, axis=1)

# List of all features
filtered_columns = df_features.columns.tolist()

print(f"\nInitial Features: {filtered_columns}")

# Perform RFE with multiple models
results = select_features_with_rfe(filtered_columns, target_column, df)

return results # Return the results list

# Feature selection using Recursive Feature Elimination (RFE) for different models
def select_features_with_rfe(features_list, target, df):
results = [] # Store results in a list

# Available models, including PyTorch Neural Network
models = {
'LinearRegression': LinearRegression(),
'Ridge': Ridge(),
'Lasso': Lasso(),
'ElasticNet': ElasticNet(),
'SVR': SVR(kernel='linear'),
'DecisionTree': DecisionTreeRegressor(),
'RandomForest': RandomForestRegressor(n_estimators=50, max_depth=5),
'GradientBoosting': GradientBoostingRegressor(n_estimators=50, max_depth=3),
'PyTorchNN': NeuralNetworkRegressor
}

# Parallelize feature selection and model evaluation
results = Parallel(n_jobs=-1)(delayed(evaluate_model_with_rfe)(model_name, model, df, features_list, target)
for model_name, model in models.items())

return results # Return the list of results

# Helper function to perform RFE and evaluate each model
def evaluate_model_with_rfe(model_name, model, df, features, target):
X = df[features]
y = df[target]

# Perform RFE with the model, skipping for PyTorch (RFE only for sklearn models)
if model_name != 'PyTorchNN':
rfe = RFE(estimator=model, n_features_to_select=5) # Select top 5 features
X_rfe = rfe.fit_transform(X, y)
else:
X_rfe = X.values # No RFE for PyTorch

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_rfe, y, test_size=0.2, random_state=42)

if model_name == 'PyTorchNN':
# Convert data to PyTorch tensors and use GPU
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).cuda()
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).cuda().unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).cuda()
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).cuda().unsqueeze(1)

# Initialize PyTorch model
model = NeuralNetworkRegressor(X_train.shape[1]).cuda()

# Print GPU usage
print("Training PyTorch model on GPU." if torch.cuda.is_available() else "Training PyTorch model on CPU.")

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the PyTorch model
for epoch in range(100):
model.train()
optimizer.zero_grad()
outputs = model(X_train_tensor)
loss = criterion(outputs, y_train_tensor)
loss.backward()
optimizer.step()

# Evaluate the PyTorch model
model.eval()
with torch.no_grad():
y_pred = model(X_test_tensor).cpu().numpy()

r2 = r2_score(y_test, y_pred)
selected_features = features # No RFE applied
else:
# Train the model for scikit-learn models
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Calculate the R² score
r2 = r2_score(y_test, y_pred)

# Get the selected features for scikit-learn models
selected_features = [features[i] for i in range(len(features)) if rfe.support_[i]]

# Print the model, selected features, and R² score
print(f"Model: {model_name}, Selected Features: {selected_features}, R² score: {r2:.4f}")

return model_name, selected_features, r2 # Return results

# Example usage:
# df = pd.read_csv('your_dataset.csv')
# target_column = 'target_column_name'
# results = StartTraining(target_column, df)
# print(f"\nResults: {results}")
Binary file added model/__pycache__/Regression.cpython-312.pyc
Binary file not shown.
52 changes: 40 additions & 12 deletions static/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -117,29 +117,57 @@ button[type="submit"]:hover {
background-color: #357ABD;
}

#result {
background-color: #222; /* Dark background */
color: #fff; /* White text */
border-radius: 5px; /* Rounded corners */
padding: 15px; /* Padding */
margin-top: 20px; /* Spacing above */
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.5); /* Subtle shadow */
#best-result {
background-color: #2c2c2c; /* Slightly lighter dark background for best result */
color: #fff; /* White text */
border-radius: 5px; /* Rounded corners */
padding: 15px; /* Padding */
margin-top: 20px; /* Spacing above */
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.6); /* More pronounced shadow */
}

#result h3 {
border-bottom: 2px solid #444; /* Underline for header */
#best-result h3 {
border-bottom: 2px solid #444; /* Underline for best result header */
padding-bottom: 10px; /* Space below the header */
}

#result ul {
#best-result ul {
list-style-type: disc; /* Bullet points for list */
padding-left: 20px; /* Indentation for list */
padding-left: 20px; /* Indentation for list */
}

#result p {
#best-result p {
font-weight: bold; /* Bold text for R² value */
}

#other-results {
background-color: #1a1a1a; /* Darker background for other results */
color: #fff; /* White text */
border-radius: 5px; /* Rounded corners */
padding: 15px; /* Padding */
margin-top: 20px; /* Spacing above */
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.5); /* Subtle shadow */
}

#other-results h4 {
border-top: 2px solid #555; Underline for other results header
padding-bottom: 8px; /* Space below the header */
}

#other-results ul {
list-style-type: circle; /* Circle bullet points for other models */
padding-left: 20px; /* Indentation for list */
}

#other-results p {
font-weight: normal; /* Normal weight for additional info */
}

#regression h3{
color: #cdc6c6;
text-align: center;
margin-bottom: 5px;
}

/* Style for the columns section */
#columns-section {
Expand Down
Loading

0 comments on commit 688b5ff

Please sign in to comment.