Skip to content

Commit

Permalink
adding Accuracy and RMSE on training for regression model
Browse files Browse the repository at this point in the history
  • Loading branch information
ShubhamSupekar committed Oct 19, 2024
1 parent bd5ed12 commit 6634e30
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 45 deletions.
8 changes: 7 additions & 1 deletion README
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ run this command to activate virtual environment


uvicorn main:app --reload
to run webapp run above command
to run webapp run above command



To calculate Accuracy of Regression model this is the formula:

Accuracy(%) = 100 x [1-(RMSE/Mean of Actual Values)]
Binary file modified __pycache__/main.cpython-312.pyc
Binary file not shown.
26 changes: 14 additions & 12 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ async def remove_columns(request: Request, columns_to_remove: List[str] = Form(.
})


# FastAPI endpoint for training
@app.post("/train")
async def train_model(request: Request, predictor_column: str = Form(...), dataset: str = Form(...)):
global df # Access the global df variable
Expand All @@ -119,31 +120,35 @@ async def train_model(request: Request, predictor_column: str = Form(...), datas
# Train the model using the selected predictor column
results = lr.StartTraining(predictor_column, df)

# Prepare the output for each model in a list format
# Prepare the output for each model in a list format, including accuracy percentage
model_results = [
{
"model": result[0],
"features": result[1],
"r2_score": result[2]
"model": result[0], # Model name
"features": result[1], # Selected features
"r2_score": result[2], # R² score
"rmse": result[3], # RMSE value
"accuracy": result[4] # Accuracy percentage
} for result in results
]

# Sort model results by R² score in decreasing order
model_results.sort(key=lambda x: x['r2_score'], reverse=True)
# Sort model results by both R² score (decreasing) and RMSE (increasing)
model_results.sort(key=lambda x: (-x['r2_score'], x['rmse']))

# Find the best model based on R² score (first in sorted list)
# Find the best model based on both R² score and prediction accuracy
best_model_result = model_results[0] if model_results else None

# Prepare a list of models excluding the best model for display
other_models = model_results[1:] if model_results else []

# Return the template with the best model and all models
# Return the template with the best model, all models, and added accuracy percentage
return templates.TemplateResponse("index.html", {
"request": request,
"predict_column": predictor_column,
"best_model": best_model_result['model'] if best_model_result else None, # Best model name
"best_features": best_model_result['features'] if best_model_result else None, # Best features
"best_r2": best_model_result['r2_score'] if best_model_result else None, # Best R² score
"best_rmse": best_model_result['rmse'] if best_model_result else None, # Best RMSE
"best_accuracy": best_model_result['accuracy'] if best_model_result else None, # Best Accuracy percentage
"other_models": other_models, # All other models
"dataset_used": dataset, # Show which dataset was used in training
"selected_dataset": dataset
Expand All @@ -152,9 +157,6 @@ async def train_model(request: Request, predictor_column: str = Form(...), datas






# Model to return results
class LinearRegressionResult(BaseModel):
slope: float
Expand Down
57 changes: 25 additions & 32 deletions model/Regression.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.feature_selection import mutual_info_regression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import r2_score, root_mean_squared_error
from sklearn.feature_selection import RFE
from joblib import Parallel, delayed

import numpy as np


# Load dataset function
def LoadDataset(df):
print("Available columns in the dataset:", df.columns)
return df.columns



# Start training with optimized models and RFE for feature selection
# Start training with optimized models and prediction accuracy
def StartTraining(target_column, df):
# Drop rows with missing values
df = df.dropna()
Expand All @@ -32,18 +29,13 @@ def StartTraining(target_column, df):

print(f"\nInitial Features: {filtered_columns}")

# Perform RFE with multiple models
# Perform multiple models and evaluate with 20% prediction accuracy
results = select_features_with_Mutual_Information(filtered_columns, target_column, df)

return results # Return the results list




def select_features_with_Mutual_Information(features_list, target, df):
results = [] # Store results in a list

# Available models, excluding PyTorch Neural Network
# Available models
models = {
'LinearRegression': LinearRegression(),
'Ridge': Ridge(),
Expand All @@ -61,39 +53,40 @@ def select_features_with_Mutual_Information(features_list, target, df):

return results # Return the list of results



# Helper function to perform feature selection and evaluate each model
# Helper function to perform feature selection, train, and evaluate each model
def evaluate_model_with_feature_importance(model_name, model, df, features, target):
X = df[features]
y = df[target]

# Split the dataset
# Split the dataset (80% train, 20% for testing/prediction)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model for scikit-learn models
# Train the model
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_train = model.predict(X_test)

# Calculate the R² score
r2 = r2_score(y_test, y_pred)
# Calculate the R² score on test set
r2 = r2_score(y_test, y_pred_train)

# Use Mutual Information for non-tree-based models
if model_name in ['RandomForest', 'GradientBoosting']:
# Feature importance for tree-based models
importances = model.feature_importances_
selected_features = [features[i] for i in range(len(features)) if importances[i] > 0] # Use all important features

# Print feature importance
selected_features = [features[i] for i in range(len(features)) if importances[i] > 0]
print(f"Model: {model_name}, Feature Importances: {importances}")
else:
# Use Mutual Information for non-tree-based models
mi = mutual_info_regression(X_train, y_train)
selected_features = [features[i] for i in range(len(features)) if mi[i] > 0] # Use features with positive MI

# Print Mutual Information scores
selected_features = [features[i] for i in range(len(features)) if mi[i] > 0]
print(f"Model: {model_name}, Mutual Information Scores: {mi}")

# Print the model, selected features, and R² score
print(f"Model: {model_name}, Selected Features: {selected_features}, R² score: {r2:.4f}")
# Evaluate prediction accuracy with RMSE (Root Mean Squared Error)
mse = root_mean_squared_error(y_test, y_pred_train)
rmse = np.sqrt(mse)

# Calculate the accuracy percentage
mean_actual = np.mean(y_test)
accuracy = 100 * (1 - (rmse / mean_actual))

# Print model, selected features, R² score, RMSE, and accuracy
print(f"Model: {model_name}, Selected Features: {selected_features}, R² score: {r2:.4f}, RMSE: {rmse:.4f}, Accuracy: {accuracy:.2f}%")

return model_name, selected_features, r2 # Return results
return model_name, selected_features, r2, rmse, accuracy # Return model name, features, R² score, RMSE, and accuracy
Binary file modified model/__pycache__/Regression.cpython-312.pyc
Binary file not shown.
4 changes: 4 additions & 0 deletions templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ <h4>Best Features:</h4>
{% endfor %}
</ul>
<p><strong>Best R² Value:</strong> {{ best_r2 }}</p>
<p><strong>Best Mean Squared Error:</strong> {{ best_rmse }}</p>
<p><strong>Best Accuracy:</strong> {{ best_accuracy }} %</p>
</div>
{% endif %}

Expand All @@ -129,6 +131,8 @@ <h5>Features:</h5>
{% endfor %}
</ul>
<p><strong>R² Value:</strong> {{ result.r2_score }}</p>
<p><strong> Mean Squared Error:</strong> {{ result.rmse }}</p>
<p><strong> Accuracy:</strong> {{ result.accuracy }} %</p>
</li>
{% endfor %}
</ul>
Expand Down

0 comments on commit 6634e30

Please sign in to comment.