diff --git a/ARIMA/.ipynb_checkpoints/hybrid-checkpoint.ipynb b/ARIMA/.ipynb_checkpoints/hybrid-checkpoint.ipynb new file mode 100644 index 0000000..e5870cb --- /dev/null +++ b/ARIMA/.ipynb_checkpoints/hybrid-checkpoint.ipynb @@ -0,0 +1,817 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Performing stepwise search to minimize aic\n", + " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=30240.523, Time=2.28 sec\n", + " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=30272.252, Time=0.19 sec\n", + " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=30243.723, Time=0.54 sec\n", + " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=30241.768, Time=0.77 sec\n", + " ARIMA(0,1,0)(0,0,0)[0] : AIC=30270.982, Time=0.17 sec\n", + " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=30234.621, Time=2.66 sec\n", + " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=30239.276, Time=0.76 sec\n", + " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=30236.607, Time=4.12 sec\n", + " ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=30236.577, Time=6.50 sec\n", + " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=30234.958, Time=3.86 sec\n", + " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=30240.618, Time=1.05 sec\n", + " ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=30237.920, Time=5.87 sec\n", + " ARIMA(2,1,1)(0,0,0)[0] : AIC=30233.414, Time=0.94 sec\n", + " ARIMA(1,1,1)(0,0,0)[0] : AIC=30239.179, Time=1.16 sec\n", + " ARIMA(2,1,0)(0,0,0)[0] : AIC=30237.952, Time=0.39 sec\n", + " ARIMA(3,1,1)(0,0,0)[0] : AIC=30235.401, Time=1.76 sec\n", + " ARIMA(2,1,2)(0,0,0)[0] : AIC=30235.372, Time=2.71 sec\n", + " ARIMA(1,1,0)(0,0,0)[0] : AIC=30242.355, Time=0.28 sec\n", + " ARIMA(1,1,2)(0,0,0)[0] : AIC=30233.748, Time=1.51 sec\n", + " ARIMA(3,1,0)(0,0,0)[0] : AIC=30239.309, Time=0.48 sec\n", + " ARIMA(3,1,2)(0,0,0)[0] : AIC=30236.712, Time=2.50 sec\n", + "\n", + "Best model: ARIMA(2,1,1)(0,0,0)[0] \n", + "Total fit time: 40.526 seconds\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 601ms/step\n", + "Hybrid model prediction for next day closing price: $244.92\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "from pmdarima import auto_arima\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", + "\n", + "def prepare_data(data, time_steps):\n", + " X, y = [], []\n", + " for i in range(len(data) - time_steps):\n", + " X.append(data[i:(i + time_steps), 0])\n", + " y.append(data[i + time_steps, 0])\n", + " return np.array(X), np.array(y)\n", + "\n", + "def hybrid_model(data, time_steps=60):\n", + " # Ensure data is numpy array\n", + " if isinstance(data, pd.DataFrame):\n", + " df = data.values\n", + " else:\n", + " df = np.array(data)\n", + " \n", + " df = df.reshape(-1, 1)\n", + "\n", + " df = pd.DataFrame(df).ffill().values\n", + "\n", + " # ARIMA model\n", + " model_auto = auto_arima(df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", + " d=None, seasonal=False, start_P=0, D=0, trace=True,\n", + " error_action='ignore', suppress_warnings=True, stepwise=True)\n", + "\n", + " arima_model = ARIMA(df, order=model_auto.order)\n", + " arima_results = arima_model.fit()\n", + "\n", + " # Get ARIMA residuals\n", + " arima_residuals = df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", + " \n", + " # Prepare data for LSTM\n", + " scaler = MinMaxScaler()\n", + " residuals_scaled = scaler.fit_transform(arima_residuals)\n", + "\n", + " X, y = prepare_data(residuals_scaled, time_steps)\n", + " X = np.reshape(X, (X.shape[0], X.shape[1], 1))\n", + "\n", + " # LSTM model\n", + " lstm_model = Sequential([\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", + " LSTM(units=50),\n", + " Dense(units=1)\n", + " ])\n", + " lstm_model.compile(optimizer='adam', loss='mean_squared_error')\n", + " lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)\n", + "\n", + " # Make hybrid prediction\n", + " last_60_days = residuals_scaled[-60:]\n", + " X_test = np.array([last_60_days])\n", + " X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))\n", + "\n", + " lstm_prediction = lstm_model.predict(X_test)\n", + " lstm_prediction = scaler.inverse_transform(lstm_prediction)\n", + "\n", + " arima_forecast = arima_results.forecast(steps=1)\n", + "\n", + " hybrid_prediction = arima_forecast + lstm_prediction[0][0]\n", + "\n", + " return hybrid_prediction[0]\n", + "\n", + "# Example usage with custom data\n", + "# Assuming you have a CSV file named 'my_stock_data.csv' with a 'Close' column\n", + "custom_data = pd.read_csv('../Data/SBI Train data.csv')\n", + "close_prices = custom_data['Close']\n", + "\n", + "prediction = hybrid_model(close_prices)\n", + "print(f\"Hybrid model prediction for next day closing price: ${prediction:.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Performing stepwise search to minimize aic\n", + " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=30240.523, Time=2.25 sec\n", + " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=30272.252, Time=0.22 sec\n", + " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=30243.723, Time=0.58 sec\n", + " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=30241.768, Time=0.82 sec\n", + " ARIMA(0,1,0)(0,0,0)[0] : AIC=30270.982, Time=0.15 sec\n", + " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=30234.621, Time=2.72 sec\n", + " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=30239.276, Time=0.84 sec\n", + " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=30236.607, Time=4.21 sec\n", + " ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=30236.577, Time=6.49 sec\n", + " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=30234.958, Time=4.04 sec\n", + " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=30240.618, Time=1.13 sec\n", + " ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=30237.920, Time=6.01 sec\n", + " ARIMA(2,1,1)(0,0,0)[0] : AIC=30233.414, Time=1.07 sec\n", + " ARIMA(1,1,1)(0,0,0)[0] : AIC=30239.179, Time=1.18 sec\n", + " ARIMA(2,1,0)(0,0,0)[0] : AIC=30237.952, Time=0.35 sec\n", + " ARIMA(3,1,1)(0,0,0)[0] : AIC=30235.401, Time=1.82 sec\n", + " ARIMA(2,1,2)(0,0,0)[0] : AIC=30235.372, Time=2.42 sec\n", + " ARIMA(1,1,0)(0,0,0)[0] : AIC=30242.355, Time=0.29 sec\n", + " ARIMA(1,1,2)(0,0,0)[0] : AIC=30233.748, Time=1.49 sec\n", + " ARIMA(3,1,0)(0,0,0)[0] : AIC=30239.309, Time=0.54 sec\n", + " ARIMA(3,1,2)(0,0,0)[0] : AIC=30236.712, Time=2.61 sec\n", + "\n", + "Best model: ARIMA(2,1,1)(0,0,0)[0] \n", + "Total fit time: 41.259 seconds\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 499ms/step\n", + "Hybrid model prediction for next day closing price: $245.04\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "from pmdarima import auto_arima\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", + "\n", + "def prepare_data(data, time_steps):\n", + " X, y = [], []\n", + " for i in range(len(data) - time_steps):\n", + " X.append(data[i:(i + time_steps), 0])\n", + " y.append(data[i + time_steps, 0])\n", + " return np.array(X), np.array(y)\n", + "\n", + "def hybrid_model(data, time_steps=60):\n", + " # Ensure data is numpy array\n", + " if isinstance(data, pd.Series):\n", + " df = data.values\n", + " elif isinstance(data, pd.DataFrame):\n", + " df = data.values\n", + " else:\n", + " df = np.array(data)\n", + " \n", + " df = df.reshape(-1, 1)\n", + "\n", + " df = pd.DataFrame(df).ffill().values\n", + "\n", + " # ARIMA model\n", + " model_auto = auto_arima(df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", + " d=None, seasonal=False, start_P=0, D=0, trace=True,\n", + " error_action='ignore', suppress_warnings=True, stepwise=True)\n", + "\n", + " arima_model = ARIMA(df, order=model_auto.order)\n", + " arima_results = arima_model.fit()\n", + "\n", + " # Get ARIMA residuals\n", + " arima_residuals = df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", + "\n", + " # Prepare data for LSTM\n", + " scaler = MinMaxScaler()\n", + " residuals_scaled = scaler.fit_transform(arima_residuals)\n", + "\n", + " X, y = prepare_data(residuals_scaled, time_steps)\n", + " X = np.reshape(X, (X.shape[0], X.shape[1], 1))\n", + "\n", + " # LSTM model\n", + " lstm_model = Sequential([\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", + " LSTM(units=50),\n", + " Dense(units=1)\n", + " ])\n", + " lstm_model.compile(optimizer='adam', loss='mean_squared_error')\n", + " lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)\n", + "\n", + " # Make hybrid prediction\n", + " last_60_days = residuals_scaled[-60:]\n", + " X_test = np.array([last_60_days])\n", + " X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))\n", + "\n", + " lstm_prediction = lstm_model.predict(X_test)\n", + " lstm_prediction = scaler.inverse_transform(lstm_prediction)\n", + "\n", + " arima_forecast = arima_results.forecast(steps=1)\n", + "\n", + " hybrid_prediction = arima_forecast + lstm_prediction[0][0]\n", + "\n", + " return hybrid_prediction[0]\n", + "\n", + "# Example usage with custom data\n", + "custom_data = pd.read_csv('../Data/SBI Train data.csv')\n", + "close_prices = custom_data['Close']\n", + "\n", + "prediction = hybrid_model(close_prices)\n", + "print(f\"Hybrid model prediction for next day closing price: ${prediction:.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "from pmdarima import auto_arima\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", + "\n", + "def prepare_data(data, time_steps):\n", + " X, y = [], []\n", + " for i in range(len(data) - time_steps):\n", + " X.append(data[i:(i + time_steps), 0])\n", + " y.append(data[i + time_steps, 0])\n", + " return np.array(X), np.array(y)\n", + "\n", + "def hybrid_model(train_data, test_data, time_steps=60):\n", + " # Ensure data is numpy array\n", + " if isinstance(train_data, pd.Series):\n", + " train_df = train_data.values\n", + " if isinstance(test_data, pd.DataFrame):\n", + " test_df = train_data.values\n", + " else:\n", + " train_df = np.array(train_data)\n", + " \n", + " train_df = train_df.reshape(-1, 1)\n", + "\n", + " train_df = pd.DataFrame(train_df).ffill().values\n", + "\n", + " # ARIMA model\n", + " model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", + " d=None, seasonal=False, start_P=0, D=0, trace=True,\n", + " error_action='ignore', suppress_warnings=True, stepwise=True)\n", + "\n", + " arima_model = ARIMA(train_df, order=model_auto.order)\n", + " arima_results = arima_model.fit()\n", + "\n", + " # Get ARIMA residuals\n", + " arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", + "\n", + " # Prepare data for LSTM\n", + " scaler = MinMaxScaler()\n", + " residuals_scaled = scaler.fit_transform(arima_residuals)\n", + "\n", + " X, y = prepare_data(residuals_scaled, time_steps)\n", + " X = np.reshape(X, (X.shape[0], X.shape[1], 1))\n", + "\n", + " # LSTM model\n", + " lstm_model = Sequential([\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", + " LSTM(units=50),\n", + " Dense(units=1)\n", + " ])\n", + " lstm_model.compile(optimizer='adam', loss='mean_squared_error')\n", + " lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)\n", + "\n", + " # Make predictions for test data\n", + " predictions = []\n", + " test_data = np.array(test_data).reshape(-1, 1)\n", + " combined_data = np.vstack((train_df, test_data))\n", + "\n", + " for i in range(len(test_data)):\n", + " # ARIMA prediction\n", + " arima_forecast = arima_results.forecast(steps=1)\n", + "\n", + " # LSTM prediction\n", + " last_60_days = scaler.transform(combined_data[-(time_steps+1):-1])\n", + " X_test = np.array([last_60_days])\n", + " X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))\n", + " lstm_prediction = lstm_model.predict(X_test)\n", + " lstm_prediction = scaler.inverse_transform(lstm_prediction)\n", + "\n", + " # Combine predictions\n", + " hybrid_prediction = arima_forecast + lstm_prediction[0][0]\n", + " predictions.append(hybrid_prediction[0])\n", + "\n", + " # Update ARIMA model\n", + " arima_results = arima_model.append(test_data[i]).fit()\n", + "\n", + " return np.array(predictions)\n", + "\n", + "# Load and prepare data\n", + "train_data = pd.read_csv('../Data/SBI Train data.csv')\n", + "test_data = pd.read_csv('../Data/SBI Test data.csv')\n", + "\n", + "train_close_prices = train_data['Close']\n", + "test_close_prices = test_data['Close']\n", + "\n", + "# Make predictions\n", + "predictions = hybrid_model(train_close_prices, test_close_prices)\n", + "\n", + "# Calculate accuracy metrics\n", + "mae = mean_absolute_error(test_close_prices, predictions)\n", + "rmse = np.sqrt(mean_squared_error(test_close_prices, predictions))\n", + "\n", + "print(f\"Mean Absolute Error: ${mae:.2f}\")\n", + "print(f\"Root Mean Squared Error: ${rmse:.2f}\")\n", + "\n", + "# You can also calculate percentage error\n", + "mape = np.mean(np.abs((test_close_prices - predictions) / test_close_prices)) * 100\n", + "print(f\"Mean Absolute Percentage Error: {mape:.2f}%\")\n", + "\n", + "# Plot actual vs predicted prices\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(12,6))\n", + "plt.plot(test_data['Date'], test_close_prices, label='Actual Prices')\n", + "plt.plot(test_data['Date'], predictions, label='Predicted Prices')\n", + "plt.title('Actual vs Predicted Stock Prices')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Price')\n", + "plt.legend()\n", + "plt.xticks(rotation=45)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "from pmdarima import auto_arima\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", + "\n", + "def prepare_data(data, time_steps):\n", + " X, y = [], []\n", + " for i in range(len(data) - time_steps):\n", + " X.append(data[i:(i + time_steps), 0])\n", + " y.append(data[i + time_steps, 0])\n", + " return np.array(X), np.array(y)\n", + "\n", + "def hybrid_model(train_data, test_data, time_steps=60):\n", + " # Ensure data is numpy array\n", + " if isinstance(train_data, pd.Series):\n", + " train_df = train_data.values\n", + " elif isinstance(train_data, pd.DataFrame):\n", + " train_df = train_data.values\n", + " else:\n", + " train_df = np.array(train_data)\n", + " \n", + " train_df = train_df.reshape(-1, 1)\n", + " train_df = pd.DataFrame(train_df).ffill().values\n", + "\n", + " # ARIMA model\n", + " model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", + " d=None, seasonal=False, start_P=0, D=0, trace=True,\n", + " error_action='ignore', suppress_warnings=True, stepwise=True)\n", + "\n", + " arima_model = ARIMA(train_df, order=model_auto.order)\n", + " arima_results = arima_model.fit()\n", + "\n", + " # Get ARIMA residuals\n", + " arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", + "\n", + " # Prepare data for LSTM\n", + " scaler = MinMaxScaler()\n", + " residuals_scaled = scaler.fit_transform(arima_residuals)\n", + "\n", + " X, y = prepare_data(residuals_scaled, time_steps)\n", + " X = np.reshape(X, (X.shape[0], X.shape[1], 1))\n", + "\n", + " # LSTM model\n", + " lstm_model = Sequential([\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", + " LSTM(units=50),\n", + " Dense(units=1)\n", + " ])\n", + " lstm_model.compile(optimizer='adam', loss='mean_squared_error')\n", + " lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)\n", + "\n", + " # Make predictions for test data\n", + " predictions = []\n", + " test_data = np.array(test_data).reshape(-1, 1)\n", + " combined_data = np.vstack((train_df, test_data))\n", + "\n", + " for i in range(len(test_data)):\n", + " # ARIMA prediction\n", + " arima_forecast = arima_results.forecast(steps=1)\n", + "\n", + " # LSTM prediction\n", + " last_60_days = scaler.transform(combined_data[-(time_steps+1):-1])\n", + " X_test = np.array([last_60_days])\n", + " X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))\n", + " lstm_prediction = lstm_model.predict(X_test)\n", + " lstm_prediction = scaler.inverse_transform(lstm_prediction)\n", + "\n", + " # Combine predictions\n", + " hybrid_prediction = arima_forecast + lstm_prediction[0][0]\n", + " predictions.append(hybrid_prediction[0])\n", + "\n", + " # Update ARIMA model\n", + " arima_results = arima_model.append(test_data[i]).fit()\n", + "\n", + " return np.array(predictions)\n", + "\n", + "# Load and prepare data\n", + "train_data = pd.read_csv('../Data/SBI Train data.csv')\n", + "test_data = pd.read_csv('../Data/SBI Test data.csv')\n", + "\n", + "train_close_prices = train_data['Close']\n", + "test_close_prices = test_data['Close']\n", + "\n", + "# Make predictions\n", + "predictions = hybrid_model(train_close_prices, test_close_prices)\n", + "\n", + "# Calculate accuracy metrics\n", + "mae = mean_absolute_error(test_close_prices, predictions)\n", + "rmse = np.sqrt(mean_squared_error(test_close_prices, predictions))\n", + "\n", + "print(f\"Mean Absolute Error: ${mae:.2f}\")\n", + "print(f\"Root Mean Squared Error: ${rmse:.2f}\")\n", + "\n", + "# You can also calculate percentage error\n", + "mape = np.mean(np.abs((test_close_prices - predictions) / test_close_prices)) * 100\n", + "print(f\"Mean Absolute Percentage Error: {mape:.2f}%\")\n", + "\n", + "# Plot actual vs predicted prices\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(12,6))\n", + "plt.plot(test_data['Date'], test_close_prices, label='Actual Prices')\n", + "plt.plot(test_data['Date'], predictions, label='Predicted Prices')\n", + "plt.title('Actual vs Predicted Stock Prices')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Price')\n", + "plt.legend()\n", + "plt.xticks(rotation=45)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "from pmdarima import auto_arima\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from tensorflow.keras.models import Sequential, load_model\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", + "import joblib\n", + "import os\n", + "\n", + "def prepare_data(data, time_steps):\n", + " X, y = [], []\n", + " for i in range(len(data) - time_steps):\n", + " X.append(data[i:(i + time_steps), 0])\n", + " y.append(data[i + time_steps, 0])\n", + " return np.array(X), np.array(y)\n", + "\n", + "def create_hybrid_model(train_data, time_steps=60):\n", + " # Ensure data is numpy array\n", + " if isinstance(train_data, pd.Series):\n", + " train_df = train_data.values\n", + " elif isinstance(train_data, pd.DataFrame):\n", + " train_df = train_data.values\n", + " else:\n", + " train_df = np.array(train_data)\n", + " \n", + " train_df = train_df.reshape(-1, 1)\n", + " train_df = pd.DataFrame(train_df).ffill().values\n", + "\n", + " # ARIMA model\n", + " model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", + " d=None, seasonal=False, start_P=0, D=0, trace=True,\n", + " error_action='ignore', suppress_warnings=True, stepwise=True)\n", + "\n", + " arima_model = ARIMA(train_df, order=model_auto.order)\n", + " arima_results = arima_model.fit()\n", + "\n", + " # Get ARIMA residuals\n", + " arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", + "\n", + " # Prepare data for LSTM\n", + " scaler = MinMaxScaler()\n", + " residuals_scaled = scaler.fit_transform(arima_residuals)\n", + "\n", + " X, y = prepare_data(residuals_scaled, time_steps)\n", + " X = np.reshape(X, (X.shape[0], X.shape[1], 1))\n", + "\n", + " # LSTM model\n", + " lstm_model = Sequential([\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", + " LSTM(units=50),\n", + " Dense(units=1)\n", + " ])\n", + " lstm_model.compile(optimizer='adam', loss='mean_squared_error')\n", + " lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)\n", + "\n", + " return arima_results, lstm_model, scaler\n", + "\n", + "def save_model(arima_results, lstm_model, scaler, folder_path):\n", + " if not os.path.exists(folder_path):\n", + " os.makedirs(folder_path)\n", + " \n", + " # Save ARIMA model\n", + " joblib.dump(arima_results, os.path.join(folder_path, 'arima_model.pkl'))\n", + " \n", + " # Save LSTM model\n", + " lstm_model.save(os.path.join(folder_path, 'lstm_model.h5'))\n", + " \n", + " # Save scaler\n", + " joblib.dump(scaler, os.path.join(folder_path, 'scaler.pkl'))\n", + "\n", + "def load_model(folder_path):\n", + " # Load ARIMA model\n", + " arima_results = joblib.load(os.path.join(folder_path, 'arima_model.pkl'))\n", + " \n", + " # Load LSTM model\n", + " lstm_model = load_model(os.path.join(folder_path, 'lstm_model.h5'))\n", + " \n", + " # Load scaler\n", + " scaler = joblib.load(os.path.join(folder_path, 'scaler.pkl'))\n", + " \n", + " return arima_results, lstm_model, scaler\n", + "\n", + "def make_predictions(arima_results, lstm_model, scaler, test_data, time_steps=60):\n", + " predictions = []\n", + " test_data = np.array(test_data).reshape(-1, 1)\n", + "\n", + " for i in range(len(test_data)):\n", + " # ARIMA prediction\n", + " arima_forecast = arima_results.forecast(steps=1)\n", + "\n", + " # LSTM prediction\n", + " last_60_days = scaler.transform(test_data[i:i+time_steps])\n", + " X_test = np.array([last_60_days])\n", + " X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))\n", + " lstm_prediction = lstm_model.predict(X_test)\n", + " lstm_prediction = scaler.inverse_transform(lstm_prediction)\n", + "\n", + " # Combine predictions\n", + " hybrid_prediction = arima_forecast + lstm_prediction[0][0]\n", + " predictions.append(hybrid_prediction[0])\n", + "\n", + " # Update ARIMA model\n", + " arima_results = arima_results.append(test_data[i])\n", + "\n", + " return np.array(predictions)\n", + "\n", + "# Example usage\n", + "if __name__ == \"__main__\":\n", + " # Load data\n", + " train_data = pd.read_csv('../Data/SBI Train data.csv')\n", + " test_data = pd.read_csv('../Data/SBI Test data.csv')\n", + "\n", + " train_close_prices = train_data['Close']\n", + " test_close_prices = test_data['Close']\n", + "\n", + " # Create and save the model\n", + " arima_results, lstm_model, scaler = create_hybrid_model(train_close_prices)\n", + " save_model(arima_results, lstm_model, scaler, 'saved_model')\n", + "\n", + " # Later, load the model and make predictions\n", + " loaded_arima, loaded_lstm, loaded_scaler = load_model('saved_model')\n", + " predictions = make_predictions(loaded_arima, loaded_lstm, loaded_scaler, test_close_prices)\n", + "\n", + " # Calculate accuracy metrics\n", + " mae = mean_absolute_error(test_close_prices, predictions)\n", + " rmse = np.sqrt(mean_squared_error(test_close_prices, predictions))\n", + " mape = np.mean(np.abs((test_close_prices - predictions) / test_close_prices)) * 100\n", + "\n", + " print(f\"Mean Absolute Error: ${mae:.2f}\")\n", + " print(f\"Root Mean Squared Error: ${rmse:.2f}\")\n", + " print(f\"Mean Absolute Percentage Error: {mape:.2f}%\")\n", + "\n", + " # Plot results\n", + " import matplotlib.pyplot as plt\n", + "\n", + " plt.figure(figsize=(12,6))\n", + " plt.plot(test_data['Date'], test_close_prices, label='Actual Prices')\n", + " plt.plot(test_data['Date'], predictions, label='Predicted Prices')\n", + " plt.title('Actual vs Predicted Stock Prices')\n", + " plt.xlabel('Date')\n", + " plt.ylabel('Price')\n", + " plt.legend()\n", + " plt.xticks(rotation=45)\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "from pmdarima import auto_arima\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from tensorflow.keras.models import Sequential, load_model\n", + "from tensorflow.keras.layers import Dense, LSTM\n", + "from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", + "import joblib # For saving the ARIMA model\n", + "import os\n", + "\n", + "# Data preparation\n", + "def prepare_data(data, time_steps):\n", + " X, y = [], []\n", + " for i in range(len(data) - time_steps):\n", + " X.append(data[i:(i + time_steps), 0])\n", + " y.append(data[i + time_steps, 0])\n", + " return np.array(X), np.array(y)\n", + "\n", + "# Hybrid ARIMA-LSTM Model\n", + "def hybrid_model(train_data, test_data, time_steps=60, model_dir='./model'):\n", + " # Ensure data is a numpy array\n", + " train_df = np.array(train_data).reshape(-1, 1)\n", + " train_df = pd.DataFrame(train_df).ffill().values\n", + "\n", + " # Create a directory to save models if it doesn't exist\n", + " if not os.path.exists(model_dir):\n", + " os.makedirs(model_dir)\n", + "\n", + " # ARIMA Model\n", + " arima_model_path = os.path.join(model_dir, 'arima_model.pkl')\n", + " if not os.path.exists(arima_model_path):\n", + " model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", + " d=None, seasonal=False, start_P=0, D=0, trace=True,\n", + " error_action='ignore', suppress_warnings=True, stepwise=True)\n", + "\n", + " arima_model = ARIMA(train_df, order=model_auto.order)\n", + " arima_results = arima_model.fit()\n", + " # Save ARIMA model\n", + " joblib.dump(arima_results, arima_model_path)\n", + " else:\n", + " arima_results = joblib.load(arima_model_path)\n", + "\n", + " # Get ARIMA residuals\n", + " arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", + "\n", + " # Prepare data for LSTM\n", + " scaler = MinMaxScaler()\n", + " residuals_scaled = scaler.fit_transform(arima_residuals)\n", + "\n", + " X, y = prepare_data(residuals_scaled, time_steps)\n", + " X = np.reshape(X, (X.shape[0], X.shape[1], 1))\n", + "\n", + " # LSTM Model\n", + " lstm_model_path = os.path.join(model_dir, 'lstm_model.keras') # Updated file extension\n", + " if not os.path.exists(lstm_model_path):\n", + " lstm_model = Sequential([\n", + " LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),\n", + " LSTM(units=50),\n", + " Dense(units=1)\n", + " ])\n", + " lstm_model.compile(optimizer='adam', loss='mean_squared_error')\n", + "\n", + " # Early stopping and model checkpoint\n", + " early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)\n", + " model_checkpoint = ModelCheckpoint(lstm_model_path, save_best_only=True, monitor='loss')\n", + "\n", + " # Train LSTM model\n", + " lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=1, callbacks=[early_stopping, model_checkpoint])\n", + "\n", + " else:\n", + " lstm_model = load_model(lstm_model_path)\n", + "\n", + " # Make predictions for test data\n", + " predictions = []\n", + " test_data = np.array(test_data).reshape(-1, 1)\n", + " combined_data = np.vstack((train_df, test_data))\n", + "\n", + " for i in range(len(test_data)):\n", + " # ARIMA prediction\n", + " arima_forecast = arima_results.forecast(steps=1)\n", + "\n", + " # LSTM prediction\n", + " last_60_days = scaler.transform(combined_data[-(time_steps+1):-1])\n", + " X_test = np.array([last_60_days])\n", + " X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))\n", + " lstm_prediction = lstm_model.predict(X_test)\n", + " lstm_prediction = scaler.inverse_transform(lstm_prediction)\n", + "\n", + " # Combine predictions\n", + " hybrid_prediction = arima_forecast + lstm_prediction[0][0]\n", + " predictions.append(hybrid_prediction[0])\n", + "\n", + " # Update ARIMA model with test data\n", + " arima_results = arima_model.append(test_data[i]).fit()\n", + "\n", + " return np.array(predictions)\n", + "\n", + "# Load and prepare data\n", + "train_data = pd.read_csv('../Data/SBI Train data.csv')\n", + "test_data = pd.read_csv('../Data/SBI Test data.csv')\n", + "\n", + "train_close_prices = train_data['Close']\n", + "test_close_prices = test_data['Close']\n", + "\n", + "# Make predictions\n", + "predictions = hybrid_model(train_close_prices, test_close_prices)\n", + "\n", + "# Calculate accuracy metrics\n", + "mae = mean_absolute_error(test_close_prices, predictions)\n", + "rmse = np.sqrt(mean_squared_error(test_close_prices, predictions))\n", + "\n", + "print(f\"Mean Absolute Error: ${mae:.2f}\")\n", + "print(f\"Root Mean Squared Error: ${rmse:.2f}\")\n", + "\n", + "# Calculate Mean Absolute Percentage Error (MAPE)\n", + "mape = np.mean(np.abs((test_close_prices - predictions) / test_close_prices)) * 100\n", + "print(f\"Mean Absolute Percentage Error: {mape:.2f}%\")\n", + "\n", + "# Plot actual vs predicted prices\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(12,6))\n", + "plt.plot(test_data['Date'], test_close_prices, label='Actual Prices')\n", + "plt.plot(test_data['Date'], predictions, label='Predicted Prices')\n", + "plt.title('Actual vs Predicted Stock Prices')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Price')\n", + "plt.legend()\n", + "plt.xticks(rotation=45)\n", + "plt.tight_layout()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/ARIMA/hybrid.ipynb b/ARIMA/hybrid.ipynb index 491551d..e5870cb 100644 --- a/ARIMA/hybrid.ipynb +++ b/ARIMA/hybrid.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -10,42 +10,32 @@ "output_type": "stream", "text": [ "Performing stepwise search to minimize aic\n", - " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=30202.237, Time=0.62 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=30233.097, Time=0.05 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=30205.175, Time=0.16 sec\n", - " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=30203.304, Time=0.22 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] : AIC=30231.827, Time=0.04 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=30196.421, Time=0.84 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=30201.003, Time=0.20 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=30198.399, Time=1.24 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=30198.353, Time=1.94 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=30196.789, Time=1.17 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=30202.291, Time=0.30 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=30199.886, Time=1.15 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] : AIC=30195.213, Time=0.28 sec\n", - " ARIMA(1,1,1)(0,0,0)[0] : AIC=30200.893, Time=0.38 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] : AIC=30199.679, Time=0.09 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] : AIC=30197.192, Time=0.56 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] : AIC=30197.148, Time=0.97 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] : AIC=30203.808, Time=0.06 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] : AIC=30195.578, Time=0.44 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] : AIC=30200.982, Time=0.13 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] : AIC=30198.679, Time=0.51 sec\n", + " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=30240.523, Time=2.28 sec\n", + " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=30272.252, Time=0.19 sec\n", + " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=30243.723, Time=0.54 sec\n", + " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=30241.768, Time=0.77 sec\n", + " ARIMA(0,1,0)(0,0,0)[0] : AIC=30270.982, Time=0.17 sec\n", + " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=30234.621, Time=2.66 sec\n", + " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=30239.276, Time=0.76 sec\n", + " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=30236.607, Time=4.12 sec\n", + " ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=30236.577, Time=6.50 sec\n", + " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=30234.958, Time=3.86 sec\n", + " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=30240.618, Time=1.05 sec\n", + " ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=30237.920, Time=5.87 sec\n", + " ARIMA(2,1,1)(0,0,0)[0] : AIC=30233.414, Time=0.94 sec\n", + " ARIMA(1,1,1)(0,0,0)[0] : AIC=30239.179, Time=1.16 sec\n", + " ARIMA(2,1,0)(0,0,0)[0] : AIC=30237.952, Time=0.39 sec\n", + " ARIMA(3,1,1)(0,0,0)[0] : AIC=30235.401, Time=1.76 sec\n", + " ARIMA(2,1,2)(0,0,0)[0] : AIC=30235.372, Time=2.71 sec\n", + " ARIMA(1,1,0)(0,0,0)[0] : AIC=30242.355, Time=0.28 sec\n", + " ARIMA(1,1,2)(0,0,0)[0] : AIC=30233.748, Time=1.51 sec\n", + " ARIMA(3,1,0)(0,0,0)[0] : AIC=30239.309, Time=0.48 sec\n", + " ARIMA(3,1,2)(0,0,0)[0] : AIC=30236.712, Time=2.50 sec\n", "\n", "Best model: ARIMA(2,1,1)(0,0,0)[0] \n", - "Total fit time: 11.362 seconds\n" - ] - }, - { - "ename": "AttributeError", - "evalue": "'numpy.ndarray' object has no attribute 'values'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[2], line 71\u001b[0m\n\u001b[0;32m 68\u001b[0m custom_data \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../Data/SBI Train data.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 69\u001b[0m close_prices \u001b[38;5;241m=\u001b[39m custom_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mClose\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m---> 71\u001b[0m prediction \u001b[38;5;241m=\u001b[39m \u001b[43mhybrid_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mclose_prices\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHybrid model prediction for next day closing price: $\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mprediction\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m.2f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "Cell \u001b[1;32mIn[2], line 34\u001b[0m, in \u001b[0;36mhybrid_model\u001b[1;34m(data, time_steps)\u001b[0m\n\u001b[0;32m 31\u001b[0m arima_results \u001b[38;5;241m=\u001b[39m arima_model\u001b[38;5;241m.\u001b[39mfit()\n\u001b[0;32m 33\u001b[0m \u001b[38;5;66;03m# Get ARIMA residuals\u001b[39;00m\n\u001b[1;32m---> 34\u001b[0m arima_residuals \u001b[38;5;241m=\u001b[39m df \u001b[38;5;241m-\u001b[39m \u001b[43marima_results\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfittedvalues\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[38;5;241m.\u001b[39mreshape(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m 36\u001b[0m \u001b[38;5;66;03m# Prepare data for LSTM\u001b[39;00m\n\u001b[0;32m 37\u001b[0m scaler \u001b[38;5;241m=\u001b[39m MinMaxScaler()\n", - "\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'values'" + "Total fit time: 40.526 seconds\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 601ms/step\n", + "Hybrid model prediction for next day closing price: $244.92\n" ] } ], @@ -56,7 +46,7 @@ "from pmdarima import auto_arima\n", "from sklearn.preprocessing import MinMaxScaler\n", "from tensorflow.keras.models import Sequential\n", - "from tensorflow.keras.layers import Dense, LSTM\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", "\n", "def prepare_data(data, time_steps):\n", " X, y = [], []\n", @@ -74,6 +64,8 @@ " \n", " df = df.reshape(-1, 1)\n", "\n", + " df = pd.DataFrame(df).ffill().values\n", + "\n", " # ARIMA model\n", " model_auto = auto_arima(df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", " d=None, seasonal=False, start_P=0, D=0, trace=True,\n", @@ -83,8 +75,9 @@ " arima_results = arima_model.fit()\n", "\n", " # Get ARIMA residuals\n", - " arima_residuals = df - arima_results.fittedvalues.values.reshape(-1, 1)\n", - "\n", + " arima_residuals = df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", + " \n", " # Prepare data for LSTM\n", " scaler = MinMaxScaler()\n", " residuals_scaled = scaler.fit_transform(arima_residuals)\n", @@ -94,7 +87,8 @@ "\n", " # LSTM model\n", " lstm_model = Sequential([\n", - " LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", " LSTM(units=50),\n", " Dense(units=1)\n", " ])\n", @@ -126,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -134,46 +128,32 @@ "output_type": "stream", "text": [ "Performing stepwise search to minimize aic\n", - " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=30202.237, Time=1.22 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=30233.097, Time=0.09 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=30205.175, Time=0.55 sec\n", - " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=30203.304, Time=0.46 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] : AIC=30231.827, Time=0.06 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=30196.421, Time=1.17 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=30201.003, Time=0.20 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=30198.399, Time=1.24 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=30198.353, Time=1.94 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=30196.789, Time=1.18 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=30202.291, Time=0.31 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=30199.886, Time=1.17 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] : AIC=30195.213, Time=0.31 sec\n", - " ARIMA(1,1,1)(0,0,0)[0] : AIC=30200.893, Time=0.39 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] : AIC=30199.679, Time=0.08 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] : AIC=30197.192, Time=0.57 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] : AIC=30197.148, Time=0.97 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] : AIC=30203.808, Time=0.07 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] : AIC=30195.578, Time=0.45 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] : AIC=30200.982, Time=0.14 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] : AIC=30198.679, Time=0.55 sec\n", + " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=30240.523, Time=2.25 sec\n", + " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=30272.252, Time=0.22 sec\n", + " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=30243.723, Time=0.58 sec\n", + " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=30241.768, Time=0.82 sec\n", + " ARIMA(0,1,0)(0,0,0)[0] : AIC=30270.982, Time=0.15 sec\n", + " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=30234.621, Time=2.72 sec\n", + " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=30239.276, Time=0.84 sec\n", + " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=30236.607, Time=4.21 sec\n", + " ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=30236.577, Time=6.49 sec\n", + " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=30234.958, Time=4.04 sec\n", + " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=30240.618, Time=1.13 sec\n", + " ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=30237.920, Time=6.01 sec\n", + " ARIMA(2,1,1)(0,0,0)[0] : AIC=30233.414, Time=1.07 sec\n", + " ARIMA(1,1,1)(0,0,0)[0] : AIC=30239.179, Time=1.18 sec\n", + " ARIMA(2,1,0)(0,0,0)[0] : AIC=30237.952, Time=0.35 sec\n", + " ARIMA(3,1,1)(0,0,0)[0] : AIC=30235.401, Time=1.82 sec\n", + " ARIMA(2,1,2)(0,0,0)[0] : AIC=30235.372, Time=2.42 sec\n", + " ARIMA(1,1,0)(0,0,0)[0] : AIC=30242.355, Time=0.29 sec\n", + " ARIMA(1,1,2)(0,0,0)[0] : AIC=30233.748, Time=1.49 sec\n", + " ARIMA(3,1,0)(0,0,0)[0] : AIC=30239.309, Time=0.54 sec\n", + " ARIMA(3,1,2)(0,0,0)[0] : AIC=30236.712, Time=2.61 sec\n", "\n", "Best model: ARIMA(2,1,1)(0,0,0)[0] \n", - "Total fit time: 13.125 seconds\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\agraw\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", - " super().__init__(**kwargs)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 148ms/step\n", - "Hybrid model prediction for next day closing price: $245.77\n" + "Total fit time: 41.259 seconds\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 499ms/step\n", + "Hybrid model prediction for next day closing price: $245.04\n" ] } ], @@ -184,7 +164,7 @@ "from pmdarima import auto_arima\n", "from sklearn.preprocessing import MinMaxScaler\n", "from tensorflow.keras.models import Sequential\n", - "from tensorflow.keras.layers import Dense, LSTM\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", "\n", "def prepare_data(data, time_steps):\n", " X, y = [], []\n", @@ -204,6 +184,8 @@ " \n", " df = df.reshape(-1, 1)\n", "\n", + " df = pd.DataFrame(df).ffill().values\n", + "\n", " # ARIMA model\n", " model_auto = auto_arima(df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", " d=None, seasonal=False, start_P=0, D=0, trace=True,\n", @@ -214,6 +196,7 @@ "\n", " # Get ARIMA residuals\n", " arima_residuals = df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", "\n", " # Prepare data for LSTM\n", " scaler = MinMaxScaler()\n", @@ -224,7 +207,8 @@ "\n", " # LSTM model\n", " lstm_model = Sequential([\n", - " LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", " LSTM(units=50),\n", " Dense(units=1)\n", " ])\n", @@ -255,68 +239,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Performing stepwise search to minimize aic\n", - " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=30202.237, Time=0.62 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=30233.097, Time=0.05 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=30205.175, Time=0.15 sec\n", - " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=30203.304, Time=0.21 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] : AIC=30231.827, Time=0.03 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=30196.421, Time=0.85 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=30201.003, Time=0.21 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=30198.399, Time=1.25 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=30198.353, Time=1.96 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=30196.789, Time=1.21 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=30202.291, Time=0.32 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=30199.886, Time=1.15 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] : AIC=30195.213, Time=0.29 sec\n", - " ARIMA(1,1,1)(0,0,0)[0] : AIC=30200.893, Time=0.43 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] : AIC=30199.679, Time=0.09 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] : AIC=30197.192, Time=0.58 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] : AIC=30197.148, Time=1.01 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] : AIC=30203.808, Time=0.06 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] : AIC=30195.578, Time=0.44 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] : AIC=30200.982, Time=0.15 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] : AIC=30198.679, Time=0.52 sec\n", - "\n", - "Best model: ARIMA(2,1,1)(0,0,0)[0] \n", - "Total fit time: 11.613 seconds\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\agraw\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", - " super().__init__(**kwargs)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 140ms/step\n" - ] - }, - { - "ename": "AttributeError", - "evalue": "'ARIMA' object has no attribute 'append'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[4], line 88\u001b[0m\n\u001b[0;32m 85\u001b[0m test_close_prices \u001b[38;5;241m=\u001b[39m test_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mClose\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m 87\u001b[0m \u001b[38;5;66;03m# Make predictions\u001b[39;00m\n\u001b[1;32m---> 88\u001b[0m predictions \u001b[38;5;241m=\u001b[39m \u001b[43mhybrid_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_close_prices\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_close_prices\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 90\u001b[0m \u001b[38;5;66;03m# Calculate accuracy metrics\u001b[39;00m\n\u001b[0;32m 91\u001b[0m mae \u001b[38;5;241m=\u001b[39m mean_absolute_error(test_close_prices, predictions)\n", - "Cell \u001b[1;32mIn[4], line 76\u001b[0m, in \u001b[0;36mhybrid_model\u001b[1;34m(train_data, test_data, time_steps)\u001b[0m\n\u001b[0;32m 73\u001b[0m predictions\u001b[38;5;241m.\u001b[39mappend(hybrid_prediction[\u001b[38;5;241m0\u001b[39m])\n\u001b[0;32m 75\u001b[0m \u001b[38;5;66;03m# Update ARIMA model\u001b[39;00m\n\u001b[1;32m---> 76\u001b[0m arima_results \u001b[38;5;241m=\u001b[39m \u001b[43marima_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mappend\u001b[49m(test_data[i])\u001b[38;5;241m.\u001b[39mfit()\n\u001b[0;32m 78\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray(predictions)\n", - "\u001b[1;31mAttributeError\u001b[0m: 'ARIMA' object has no attribute 'append'" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -324,7 +249,7 @@ "from pmdarima import auto_arima\n", "from sklearn.preprocessing import MinMaxScaler\n", "from tensorflow.keras.models import Sequential\n", - "from tensorflow.keras.layers import Dense, LSTM\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", "\n", "def prepare_data(data, time_steps):\n", @@ -338,13 +263,15 @@ " # Ensure data is numpy array\n", " if isinstance(train_data, pd.Series):\n", " train_df = train_data.values\n", - " elif isinstance(train_data, pd.DataFrame):\n", - " train_df = train_data.values\n", + " if isinstance(test_data, pd.DataFrame):\n", + " test_df = train_data.values\n", " else:\n", " train_df = np.array(train_data)\n", " \n", " train_df = train_df.reshape(-1, 1)\n", "\n", + " train_df = pd.DataFrame(train_df).ffill().values\n", + "\n", " # ARIMA model\n", " model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", " d=None, seasonal=False, start_P=0, D=0, trace=True,\n", @@ -355,6 +282,7 @@ "\n", " # Get ARIMA residuals\n", " arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", "\n", " # Prepare data for LSTM\n", " scaler = MinMaxScaler()\n", @@ -365,7 +293,8 @@ "\n", " # LSTM model\n", " lstm_model = Sequential([\n", - " LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", " LSTM(units=50),\n", " Dense(units=1)\n", " ])\n", @@ -435,68 +364,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Performing stepwise search to minimize aic\n", - " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=30202.237, Time=0.63 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=30233.097, Time=0.05 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=30205.175, Time=0.17 sec\n", - " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=30203.304, Time=0.24 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] : AIC=30231.827, Time=0.04 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=30196.421, Time=0.93 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=30201.003, Time=0.23 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=30198.399, Time=1.30 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=30198.353, Time=1.94 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=30196.789, Time=1.22 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=30202.291, Time=0.30 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=30199.886, Time=1.15 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] : AIC=30195.213, Time=0.31 sec\n", - " ARIMA(1,1,1)(0,0,0)[0] : AIC=30200.893, Time=0.42 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] : AIC=30199.679, Time=0.10 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] : AIC=30197.192, Time=0.61 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] : AIC=30197.148, Time=1.00 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] : AIC=30203.808, Time=0.05 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] : AIC=30195.578, Time=0.45 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] : AIC=30200.982, Time=0.14 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] : AIC=30198.679, Time=0.54 sec\n", - "\n", - "Best model: ARIMA(2,1,1)(0,0,0)[0] \n", - "Total fit time: 11.839 seconds\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\agraw\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", - " super().__init__(**kwargs)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 134ms/step\n" - ] - }, - { - "ename": "AttributeError", - "evalue": "'ARIMA' object has no attribute 'append'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[5], line 88\u001b[0m\n\u001b[0;32m 85\u001b[0m test_close_prices \u001b[38;5;241m=\u001b[39m test_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mClose\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m 87\u001b[0m \u001b[38;5;66;03m# Make predictions\u001b[39;00m\n\u001b[1;32m---> 88\u001b[0m predictions \u001b[38;5;241m=\u001b[39m \u001b[43mhybrid_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_close_prices\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_close_prices\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 90\u001b[0m \u001b[38;5;66;03m# Calculate accuracy metrics\u001b[39;00m\n\u001b[0;32m 91\u001b[0m mae \u001b[38;5;241m=\u001b[39m mean_absolute_error(test_close_prices, predictions)\n", - "Cell \u001b[1;32mIn[5], line 76\u001b[0m, in \u001b[0;36mhybrid_model\u001b[1;34m(train_data, test_data, time_steps)\u001b[0m\n\u001b[0;32m 73\u001b[0m predictions\u001b[38;5;241m.\u001b[39mappend(hybrid_prediction[\u001b[38;5;241m0\u001b[39m])\n\u001b[0;32m 75\u001b[0m \u001b[38;5;66;03m# Update ARIMA model\u001b[39;00m\n\u001b[1;32m---> 76\u001b[0m arima_results \u001b[38;5;241m=\u001b[39m \u001b[43marima_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mappend\u001b[49m(test_data[i])\u001b[38;5;241m.\u001b[39mfit()\n\u001b[0;32m 78\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray(predictions)\n", - "\u001b[1;31mAttributeError\u001b[0m: 'ARIMA' object has no attribute 'append'" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -504,7 +374,7 @@ "from pmdarima import auto_arima\n", "from sklearn.preprocessing import MinMaxScaler\n", "from tensorflow.keras.models import Sequential\n", - "from tensorflow.keras.layers import Dense, LSTM\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", "\n", "def prepare_data(data, time_steps):\n", @@ -524,6 +394,7 @@ " train_df = np.array(train_data)\n", " \n", " train_df = train_df.reshape(-1, 1)\n", + " train_df = pd.DataFrame(train_df).ffill().values\n", "\n", " # ARIMA model\n", " model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", @@ -535,6 +406,7 @@ "\n", " # Get ARIMA residuals\n", " arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", "\n", " # Prepare data for LSTM\n", " scaler = MinMaxScaler()\n", @@ -545,7 +417,8 @@ "\n", " # LSTM model\n", " lstm_model = Sequential([\n", - " LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", " LSTM(units=50),\n", " Dense(units=1)\n", " ])\n", @@ -615,64 +488,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Performing stepwise search to minimize aic\n", - " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=30202.237, Time=0.59 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=30233.097, Time=0.06 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=30205.175, Time=0.16 sec\n", - " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=30203.304, Time=0.25 sec\n", - " ARIMA(0,1,0)(0,0,0)[0] : AIC=30231.827, Time=0.04 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=30196.421, Time=0.85 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=30201.003, Time=0.20 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=30198.399, Time=1.28 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=30198.353, Time=2.09 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=30196.789, Time=1.29 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=30202.291, Time=0.33 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=30199.886, Time=1.23 sec\n", - " ARIMA(2,1,1)(0,0,0)[0] : AIC=30195.213, Time=0.30 sec\n", - " ARIMA(1,1,1)(0,0,0)[0] : AIC=30200.893, Time=0.38 sec\n", - " ARIMA(2,1,0)(0,0,0)[0] : AIC=30199.679, Time=0.09 sec\n", - " ARIMA(3,1,1)(0,0,0)[0] : AIC=30197.192, Time=0.57 sec\n", - " ARIMA(2,1,2)(0,0,0)[0] : AIC=30197.148, Time=0.98 sec\n", - " ARIMA(1,1,0)(0,0,0)[0] : AIC=30203.808, Time=0.06 sec\n", - " ARIMA(1,1,2)(0,0,0)[0] : AIC=30195.578, Time=0.42 sec\n", - " ARIMA(3,1,0)(0,0,0)[0] : AIC=30200.982, Time=0.14 sec\n", - " ARIMA(3,1,2)(0,0,0)[0] : AIC=30198.679, Time=0.50 sec\n", - "\n", - "Best model: ARIMA(2,1,1)(0,0,0)[0] \n", - "Total fit time: 11.823 seconds\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\agraw\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", - " super().__init__(**kwargs)\n", - "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n" - ] - }, - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'saved_model\\\\lstm_model.h5\\\\arima_model.pkl'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[6], line 122\u001b[0m\n\u001b[0;32m 119\u001b[0m save_model(arima_results, lstm_model, scaler, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msaved_model\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 121\u001b[0m \u001b[38;5;66;03m# Later, load the model and make predictions\u001b[39;00m\n\u001b[1;32m--> 122\u001b[0m loaded_arima, loaded_lstm, loaded_scaler \u001b[38;5;241m=\u001b[39m \u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msaved_model\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 123\u001b[0m predictions \u001b[38;5;241m=\u001b[39m make_predictions(loaded_arima, loaded_lstm, loaded_scaler, test_close_prices)\n\u001b[0;32m 125\u001b[0m \u001b[38;5;66;03m# Calculate accuracy metrics\u001b[39;00m\n", - "Cell \u001b[1;32mIn[6], line 77\u001b[0m, in \u001b[0;36mload_model\u001b[1;34m(folder_path)\u001b[0m\n\u001b[0;32m 74\u001b[0m arima_results \u001b[38;5;241m=\u001b[39m joblib\u001b[38;5;241m.\u001b[39mload(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(folder_path, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124marima_model.pkl\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[0;32m 76\u001b[0m \u001b[38;5;66;03m# Load LSTM model\u001b[39;00m\n\u001b[1;32m---> 77\u001b[0m lstm_model \u001b[38;5;241m=\u001b[39m \u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfolder_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlstm_model.h5\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 79\u001b[0m \u001b[38;5;66;03m# Load scaler\u001b[39;00m\n\u001b[0;32m 80\u001b[0m scaler \u001b[38;5;241m=\u001b[39m joblib\u001b[38;5;241m.\u001b[39mload(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(folder_path, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscaler.pkl\u001b[39m\u001b[38;5;124m'\u001b[39m))\n", - "Cell \u001b[1;32mIn[6], line 74\u001b[0m, in \u001b[0;36mload_model\u001b[1;34m(folder_path)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m(folder_path):\n\u001b[0;32m 73\u001b[0m \u001b[38;5;66;03m# Load ARIMA model\u001b[39;00m\n\u001b[1;32m---> 74\u001b[0m arima_results \u001b[38;5;241m=\u001b[39m \u001b[43mjoblib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfolder_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43marima_model.pkl\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 76\u001b[0m \u001b[38;5;66;03m# Load LSTM model\u001b[39;00m\n\u001b[0;32m 77\u001b[0m lstm_model \u001b[38;5;241m=\u001b[39m load_model(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(folder_path, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlstm_model.h5\u001b[39m\u001b[38;5;124m'\u001b[39m))\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\joblib\\numpy_pickle.py:579\u001b[0m, in \u001b[0;36mload\u001b[1;34m(filename, mmap_mode)\u001b[0m\n\u001b[0;32m 577\u001b[0m obj \u001b[38;5;241m=\u001b[39m _unpickle(fobj)\n\u001b[0;32m 578\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 579\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 580\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _read_fileobject(f, filename, mmap_mode) \u001b[38;5;28;01mas\u001b[39;00m fobj:\n\u001b[0;32m 581\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(fobj, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m 582\u001b[0m \u001b[38;5;66;03m# if the returned file object is a string, this means we\u001b[39;00m\n\u001b[0;32m 583\u001b[0m \u001b[38;5;66;03m# try to load a pickle file generated with an version of\u001b[39;00m\n\u001b[0;32m 584\u001b[0m \u001b[38;5;66;03m# Joblib so we load it with joblib compatibility function.\u001b[39;00m\n", - "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'saved_model\\\\lstm_model.h5\\\\arima_model.pkl'" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -680,7 +498,7 @@ "from pmdarima import auto_arima\n", "from sklearn.preprocessing import MinMaxScaler\n", "from tensorflow.keras.models import Sequential, load_model\n", - "from tensorflow.keras.layers import Dense, LSTM\n", + "from tensorflow.keras.layers import Dense, LSTM, Input\n", "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", "import joblib\n", "import os\n", @@ -702,6 +520,7 @@ " train_df = np.array(train_data)\n", " \n", " train_df = train_df.reshape(-1, 1)\n", + " train_df = pd.DataFrame(train_df).ffill().values\n", "\n", " # ARIMA model\n", " model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,\n", @@ -713,6 +532,7 @@ "\n", " # Get ARIMA residuals\n", " arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", "\n", " # Prepare data for LSTM\n", " scaler = MinMaxScaler()\n", @@ -723,7 +543,8 @@ "\n", " # LSTM model\n", " lstm_model = Sequential([\n", - " LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),\n", + " Input(shape=(X.shape[1], 1)),\n", + " LSTM(units=50, return_sequences=True), \n", " LSTM(units=50),\n", " Dense(units=1)\n", " ])\n", @@ -824,43 +645,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:5 out of the last 5 calls to .one_step_on_data_distributed at 0x000002DD2831BCE0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:5 out of the last 5 calls to .one_step_on_data_distributed at 0x000002DD2831BCE0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 146ms/step\n" - ] - }, - { - "ename": "UnboundLocalError", - "evalue": "cannot access local variable 'arima_model' where it is not associated with a value", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mUnboundLocalError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[9], line 107\u001b[0m\n\u001b[0;32m 104\u001b[0m test_close_prices \u001b[38;5;241m=\u001b[39m test_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mClose\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m 106\u001b[0m \u001b[38;5;66;03m# Make predictions\u001b[39;00m\n\u001b[1;32m--> 107\u001b[0m predictions \u001b[38;5;241m=\u001b[39m \u001b[43mhybrid_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_close_prices\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_close_prices\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 109\u001b[0m \u001b[38;5;66;03m# Calculate accuracy metrics\u001b[39;00m\n\u001b[0;32m 110\u001b[0m mae \u001b[38;5;241m=\u001b[39m mean_absolute_error(test_close_prices, predictions)\n", - "Cell \u001b[1;32mIn[9], line 95\u001b[0m, in \u001b[0;36mhybrid_model\u001b[1;34m(train_data, test_data, time_steps, model_dir)\u001b[0m\n\u001b[0;32m 92\u001b[0m predictions\u001b[38;5;241m.\u001b[39mappend(hybrid_prediction[\u001b[38;5;241m0\u001b[39m])\n\u001b[0;32m 94\u001b[0m \u001b[38;5;66;03m# Update ARIMA model with test data\u001b[39;00m\n\u001b[1;32m---> 95\u001b[0m arima_results \u001b[38;5;241m=\u001b[39m \u001b[43marima_model\u001b[49m\u001b[38;5;241m.\u001b[39mappend(test_data[i])\u001b[38;5;241m.\u001b[39mfit()\n\u001b[0;32m 97\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray(predictions)\n", - "\u001b[1;31mUnboundLocalError\u001b[0m: cannot access local variable 'arima_model' where it is not associated with a value" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -886,6 +673,7 @@ "def hybrid_model(train_data, test_data, time_steps=60, model_dir='./model'):\n", " # Ensure data is a numpy array\n", " train_df = np.array(train_data).reshape(-1, 1)\n", + " train_df = pd.DataFrame(train_df).ffill().values\n", "\n", " # Create a directory to save models if it doesn't exist\n", " if not os.path.exists(model_dir):\n", @@ -907,6 +695,7 @@ "\n", " # Get ARIMA residuals\n", " arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)\n", + " arima_residuals = np.nan_to_num(arima_residuals)\n", "\n", " # Prepare data for LSTM\n", " scaler = MinMaxScaler()\n", @@ -1006,7 +795,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1020,9 +809,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.12.4" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/ARIMA/saved_model/arima_model.pkl b/ARIMA/saved_model/arima_model.pkl new file mode 100644 index 0000000..abda231 Binary files /dev/null and b/ARIMA/saved_model/arima_model.pkl differ diff --git a/ARIMA/saved_model/lstm_model.h5 b/ARIMA/saved_model/lstm_model.h5 new file mode 100644 index 0000000..57f2d66 Binary files /dev/null and b/ARIMA/saved_model/lstm_model.h5 differ diff --git a/ARIMA/saved_model/scaler.pkl b/ARIMA/saved_model/scaler.pkl new file mode 100644 index 0000000..6cff6ce Binary files /dev/null and b/ARIMA/saved_model/scaler.pkl differ