diff --git a/Hedging with Real Estate/New_Upgrade_Bengaluru_House_Price_Predition/GradientBoostingRegressor.pkl b/Hedging with Real Estate/New_Upgrade_Bengaluru_House_Price_Predition/GradientBoostingRegressor.pkl new file mode 100644 index 00000000..6ce6ae82 Binary files /dev/null and b/Hedging with Real Estate/New_Upgrade_Bengaluru_House_Price_Predition/GradientBoostingRegressor.pkl differ diff --git a/Hedging with Real Estate/New_Upgrade_Bengaluru_House_Price_Predition/MoreModels.ipynb b/Hedging with Real Estate/New_Upgrade_Bengaluru_House_Price_Predition/MoreModels.ipynb new file mode 100644 index 00000000..29964f9f --- /dev/null +++ b/Hedging with Real Estate/New_Upgrade_Bengaluru_House_Price_Predition/MoreModels.ipynb @@ -0,0 +1,8630 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.pipeline import Pipeline\n", + "import pickle\n", + "import numpy as np\n", + "\n", + "# Scraping the data preprocessing pipeline from the existing Pipeline\n", + "with open('PIPELINE.pkl' , 'rb') as file:\n", + " pipeline = pickle.load(file)\n", + "with open('dataframe.pkl' , 'rb') as file:\n", + " data = pickle.load(file)\n", + "\n", + "data_pipeline = Pipeline(pipeline.steps[:-1])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])),\n", + " ('step2', StandardScaler())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])),\n", + " ('step2', StandardScaler())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])
[0, 2]
OneHotEncoder(drop='first', sparse_output=False)
['availability', 'total_sqft', 'bath', 'bhk']
passthrough
StandardScaler()
GridSearchCV(cv=5,\n", + " estimator=Pipeline(steps=[('Data_processing',\n", + " Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0,\n", + " 2])])),\n", + " ('step2',\n", + " StandardScaler())])),\n", + " ('model1',\n", + " GradientBoostingRegressor())]),\n", + " n_jobs=-1,\n", + " param_grid={'model1__criterion': ['friedman_mse'],\n", + " 'model1__learning_rate': [0.01, 0.025, 0.05, 0.075,\n", + " 0.1, 0.15, 0.2],\n", + " 'model1__max_depth': [3, 5, 8],\n", + " 'model1__max_features': ['log2', 'sqrt'],\n", + " 'model1__n_estimators': [50, 100, 150, 200],\n", + " 'model1__subsample': [0.5, 0.618, 0.8, 0.85, 0.9, 0.95,\n", + " 1.0]},\n", + " scoring='neg_mean_absolute_error', verbose=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5,\n", + " estimator=Pipeline(steps=[('Data_processing',\n", + " Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0,\n", + " 2])])),\n", + " ('step2',\n", + " StandardScaler())])),\n", + " ('model1',\n", + " GradientBoostingRegressor())]),\n", + " n_jobs=-1,\n", + " param_grid={'model1__criterion': ['friedman_mse'],\n", + " 'model1__learning_rate': [0.01, 0.025, 0.05, 0.075,\n", + " 0.1, 0.15, 0.2],\n", + " 'model1__max_depth': [3, 5, 8],\n", + " 'model1__max_features': ['log2', 'sqrt'],\n", + " 'model1__n_estimators': [50, 100, 150, 200],\n", + " 'model1__subsample': [0.5, 0.618, 0.8, 0.85, 0.9, 0.95,\n", + " 1.0]},\n", + " scoring='neg_mean_absolute_error', verbose=3)
Pipeline(steps=[('Data_processing',\n", + " Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])),\n", + " ('step2', StandardScaler())])),\n", + " ('model1', GradientBoostingRegressor())])
Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])),\n", + " ('step2', StandardScaler())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])
[0, 2]
OneHotEncoder(drop='first', sparse_output=False)
['availability', 'total_sqft', 'bath', 'bhk']
passthrough
StandardScaler()
GradientBoostingRegressor()
GridSearchCV(cv=5,\n", + " estimator=Pipeline(steps=[('Data_processing',\n", + " Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0,\n", + " 2])])),\n", + " ('step2',\n", + " StandardScaler())])),\n", + " ('model2', AdaBoostRegressor())]),\n", + " n_jobs=-1,\n", + " param_grid={'model2__learning_rate': [0.01, 0.025, 0.05, 0.075,\n", + " 0.1, 0.15, 0.2],\n", + " 'model2__loss': ['linear', 'square', 'exponential'],\n", + " 'model2__n_estimators': [50, 100, 150, 200]},\n", + " scoring='neg_mean_absolute_error', verbose=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5,\n", + " estimator=Pipeline(steps=[('Data_processing',\n", + " Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0,\n", + " 2])])),\n", + " ('step2',\n", + " StandardScaler())])),\n", + " ('model2', AdaBoostRegressor())]),\n", + " n_jobs=-1,\n", + " param_grid={'model2__learning_rate': [0.01, 0.025, 0.05, 0.075,\n", + " 0.1, 0.15, 0.2],\n", + " 'model2__loss': ['linear', 'square', 'exponential'],\n", + " 'model2__n_estimators': [50, 100, 150, 200]},\n", + " scoring='neg_mean_absolute_error', verbose=3)
Pipeline(steps=[('Data_processing',\n", + " Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])),\n", + " ('step2', StandardScaler())])),\n", + " ('model2', AdaBoostRegressor())])
Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])),\n", + " ('step2', StandardScaler())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])
[0, 2]
OneHotEncoder(drop='first', sparse_output=False)
['availability', 'total_sqft', 'bath', 'bhk']
passthrough
StandardScaler()
AdaBoostRegressor()
Pipeline(steps=[('Data_pipeline',\n", + " Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])),\n", + " ('step2', StandardScaler())])),\n", + " ('stacking_model',\n", + " StackingRegressor(estimators=[('xgbr',\n", + " XGBRegressor(base_score=0.5,\n", + " booster='gbtree',\n", + " callbacks=None,\n", + " colsample_bylevel=1,\n", + " c...\n", + " max_delta_step=0,\n", + " max_depth=6,\n", + " max_leaves=0,\n", + " min_child_weight=1,\n", + " missing=nan,\n", + " monotone_constraints='()',\n", + " n_estimators=100,\n", + " n_jobs=0,\n", + " num_parallel_tree=1,\n", + " predictor='auto',\n", + " random_state=0,\n", + " reg_alpha=0,\n", + " reg_lambda=1, ...)),\n", + " ('gbr',\n", + " GradientBoostingRegressor(learning_rate=0.15,\n", + " max_depth=8,\n", + " max_features='sqrt',\n", + " n_estimators=200,\n", + " subsample=0.85))],\n", + " final_estimator=LinearSVR()))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('Data_pipeline',\n", + " Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])),\n", + " ('step2', StandardScaler())])),\n", + " ('stacking_model',\n", + " StackingRegressor(estimators=[('xgbr',\n", + " XGBRegressor(base_score=0.5,\n", + " booster='gbtree',\n", + " callbacks=None,\n", + " colsample_bylevel=1,\n", + " c...\n", + " max_delta_step=0,\n", + " max_depth=6,\n", + " max_leaves=0,\n", + " min_child_weight=1,\n", + " missing=nan,\n", + " monotone_constraints='()',\n", + " n_estimators=100,\n", + " n_jobs=0,\n", + " num_parallel_tree=1,\n", + " predictor='auto',\n", + " random_state=0,\n", + " reg_alpha=0,\n", + " reg_lambda=1, ...)),\n", + " ('gbr',\n", + " GradientBoostingRegressor(learning_rate=0.15,\n", + " max_depth=8,\n", + " max_features='sqrt',\n", + " n_estimators=200,\n", + " subsample=0.85))],\n", + " final_estimator=LinearSVR()))])
Pipeline(steps=[('step1',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])),\n", + " ('step2', StandardScaler())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('col_tnf',\n", + " OneHotEncoder(drop='first',\n", + " sparse_output=False),\n", + " [0, 2])])
[0, 2]
OneHotEncoder(drop='first', sparse_output=False)
['availability', 'total_sqft', 'bath', 'bhk']
passthrough
StandardScaler()
StackingRegressor(estimators=[('xgbr',\n", + " XGBRegressor(base_score=0.5, booster='gbtree',\n", + " callbacks=None, colsample_bylevel=1,\n", + " colsample_bynode=1,\n", + " colsample_bytree=1,\n", + " early_stopping_rounds=None,\n", + " enable_categorical=False,\n", + " eval_metric=None, gamma=0,\n", + " gpu_id=-1, grow_policy='depthwise',\n", + " importance_type=None,\n", + " interaction_constraints='',\n", + " learning_rate=0.300000012,\n", + " max_bin=2...\n", + " max_delta_step=0, max_depth=6,\n", + " max_leaves=0, min_child_weight=1,\n", + " missing=nan,\n", + " monotone_constraints='()',\n", + " n_estimators=100, n_jobs=0,\n", + " num_parallel_tree=1,\n", + " predictor='auto', random_state=0,\n", + " reg_alpha=0, reg_lambda=1, ...)),\n", + " ('gbr',\n", + " GradientBoostingRegressor(learning_rate=0.15,\n", + " max_depth=8,\n", + " max_features='sqrt',\n", + " n_estimators=200,\n", + " subsample=0.85))],\n", + " final_estimator=LinearSVR())
XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n", + " colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,\n", + " early_stopping_rounds=None, enable_categorical=False,\n", + " eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n", + " importance_type=None, interaction_constraints='',\n", + " learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,\n", + " max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,\n", + " missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,\n", + " num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,\n", + " reg_lambda=1, ...)
GradientBoostingRegressor(learning_rate=0.15, max_depth=8, max_features='sqrt',\n", + " n_estimators=200, subsample=0.85)
LinearSVR()