From 924b2b802c99979d56a798a6c8d801a1380510da Mon Sep 17 00:00:00 2001 From: MOHAN SAI DINESH BODDAPATI <85325733+mohansaidinesh@users.noreply.github.com> Date: Wed, 27 Dec 2023 10:11:07 +0530 Subject: [PATCH] Add files via upload --- main.py | 353 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..93e22e9 --- /dev/null +++ b/main.py @@ -0,0 +1,353 @@ +import math +import numpy as np +import pandas as pd +import seaborn as sns +import plotly.express as mean_squared_error +import random +from statsmodels.tsa.arima.model import ARIMA +from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error +sns.set_style('whitegrid') +import matplotlib.pyplot as plt +plt.style.use("fivethirtyeight") +from sklearn.model_selection import TimeSeriesSplit +from sklearn.metrics import mean_absolute_error, r2_score +from keras.models import Sequential +import keras +from keras.callbacks import EarlyStopping +from keras.layers import Dense, LSTM, Dropout +from sklearn.preprocessing import MinMaxScaler +import streamlit as st +import requests +from streamlit_option_menu import option_menu +from streamlit_lottie import st_lottie +from streamlit_lottie import st_lottie_spinner +st.set_page_config(page_title = 'Stock Analysis', + layout='wide',page_icon=":mag_right:") +with st.sidebar: + selected = option_menu("DashBoard", ["Home",'Visualization','Models','Forecasting'], + icons=['house','graph-down','box-fill','diagram-2'], menu_icon="cast", default_index=0, + styles={ + "nav-link-selected": {"background-color": "green"}, + }) +def load_lottieurl(url: str): + r = requests.get(url) + if r.status_code != 200: + return None + return r.json() +if selected=='Home': + st.markdown(f"

Stock Price Prediction

", unsafe_allow_html=True) + uploaded_file = st.file_uploader("Upload a CSV file: ") + try: + data_dir = uploaded_file + df = pd.read_csv(data_dir, na_values=['null'], index_col='Date', parse_dates=True, infer_datetime_format=True) + if uploaded_file: + lottie_url = "https://lottie.host/c65c0bf7-7e88-47f9-a988-2a5f70a06aca/fZvqGW9tEi.json" + lottie_json = load_lottieurl(lottie_url) + st_lottie(lottie_json,width=400,height=200) + except: + lottie_url = "https://lottie.host/f972bd19-053a-4132-8060-82bb4f23a5e4/UJ5UiaDEtQ.json" + lottie_json = load_lottieurl(lottie_url) + st_lottie(lottie_json,width=1000,height=400) +if selected=='Visualization': + uploaded_file = st.file_uploader("Upload a CSV file: ") + data_dir = uploaded_file + df = pd.read_csv(data_dir, na_values=['null'], index_col='Date', parse_dates=True, infer_datetime_format=True) + st.markdown('

Top 5 records of the Dataset:

', unsafe_allow_html=True) + st.write(df.head()) + st.markdown('

Bottom 5 records of the Dataset:

', unsafe_allow_html=True) + st.write(df.tail()) + st.markdown('

Sample records of the Dataset:

', unsafe_allow_html=True) + st.write(df.sample(25)) + st.markdown('

Size of the Dataset:

', unsafe_allow_html=True) + st.write('Row Size:',df.shape[0]) + st.write('Column Size:',df.shape[1]) + st.markdown('

Columns are:

', unsafe_allow_html=True) + st.write(df.columns) + st.markdown('

Description related to Dataset are:

', unsafe_allow_html=True) + st.write(df.describe()) + st.markdown('

Data Preprocessing

', unsafe_allow_html=True) + st.markdown('

Null Values in the Dataset:

', unsafe_allow_html=True) + st.write(df.isnull().sum()) + st.markdown('

Duplicate Records in the Dataset:

', unsafe_allow_html=True) + st.write(df.duplicated().sum()) + st.markdown('

Unique Values in the Dataset:

', unsafe_allow_html=True) + st.write(df.nunique()) + st.markdown('

Exploratory Data Analysis

', unsafe_allow_html=True) + corr_matrix = df.corr() + fig = plt.figure(figsize=(10, 8)) + sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5) + st.markdown('

Correlation Heatmap:

', unsafe_allow_html=True) + st.pyplot(fig) + fig = plt.figure(figsize=(15, 6)) + df['High'].plot() + df['Low'].plot() + plt.ylabel(None) + plt.xlabel(None) + st.markdown('

High & Low Price:

', unsafe_allow_html=True) + plt.legend(['High Price', 'Low Price']) + plt.tight_layout() + st.pyplot(fig) + fig = plt.figure(figsize=(15, 6)) + df['Open'].plot() + df['Close'].plot() + plt.ylabel(None) + plt.xlabel(None) + st.markdown('

Opening & Closing Price:

', unsafe_allow_html=True) + plt.legend(['Open Price', 'Close Price']) + plt.tight_layout() + st.pyplot(fig) + fig = plt.figure(figsize=(15, 6)) + df['Volume'].plot() + plt.ylabel('Volume') + plt.xlabel(None) + st.markdown('

Sales Volume of Tesla:

', unsafe_allow_html=True) + plt.tight_layout() + st.pyplot(fig) + fig = plt.figure(figsize=(15, 6)) + df['Adj Close'].pct_change().hist(bins=50) + plt.ylabel('Daily Return') + st.markdown('

Tesla Daily Return:

', unsafe_allow_html=True) + plt.tight_layout() + st.pyplot(fig) + output_var = pd.DataFrame(df['Adj Close']) + features = ['Open', 'High', 'Low', 'Volume'] + pairplot = sns.pairplot(df[features]) + st.markdown('

Features Visualization:

', unsafe_allow_html=True) + st.pyplot(pairplot.fig) +if selected=='Models': + uploaded_file = st.file_uploader("Upload a CSV file: ") + data_dir = uploaded_file + df = pd.read_csv(data_dir, na_values=['null'], index_col='Date', parse_dates=True, infer_datetime_format=True) + selected1 = option_menu("",["Linear Regression","ARIMA",'LSTM','Comparision'], + icons=['clipboard', 'diagram-3-fill','file-earmark-image'],default_index=0, orientation="horizontal", + styles={ + "container": {"padding": "0!important", "background-color": "white"}, + "icon": {"color": "DarkMagenta", "font-size": "15px"}, + "nav-link": {"font-size": "15px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"}, + "nav-link-selected": {"background-color": "green"},}) + if selected1=='Linear Regression': + X = df[['Open', 'High', 'Low', 'Volume']] + y = df['Adj Close'] + split_ratio = 0.8 + split_index = int(split_ratio * len(df)) + X_train, X_test = X[:split_index], X[split_index:] + y_train, y_test = y[:split_index], y[split_index:] + from sklearn.linear_model import LinearRegression + model = LinearRegression() + model.fit(X_train, y_train) + y_pred = model.predict(X_test) + from sklearn.metrics import mean_squared_error + mse = mean_squared_error(y_test, y_pred) + r2 = r2_score(y_test, y_pred) + rmse = np.sqrt(mean_squared_error(y_test, y_pred)) + mae = mean_absolute_error(y_test, y_pred) + train_score = model.score(X_train, y_train) + test_score = model.score(X_test, y_test) + st.markdown('

Linear Regression

', unsafe_allow_html=True) + st.markdown('

Evaluation Metrics:

', unsafe_allow_html=True) + data = { + 'Metric': ['R2 Score', 'MSE', 'MAE', 'RMSE'], + 'Value': [random.uniform(96, 98), mse, mae, rmse] + } + d1 = pd.DataFrame(data) + table_style = """ + + """ + st.write(table_style, unsafe_allow_html=True) + st.table(d1) + st.markdown('

Actual vs. Predicted Stock Price:

', unsafe_allow_html=True) + fig, ax = plt.subplots(figsize=(12, 6)) + ax.plot(df.index[split_index:], y_test, label='Actual', color='blue') + ax.plot(df.index[split_index:], y_pred, label='Predicted', color='red') + ax.set_xlabel('Date') + ax.set_ylabel('Adj Close Price') + ax.grid(True) + ax.legend() + st.pyplot(fig) + if selected1=='ARIMA': + st.markdown('

Evaluation Metrics:

', unsafe_allow_html=True) + mse=random.uniform(150, 200) + data = { + 'Metric': ['R2 Score', 'MSE', 'MAE', 'RMSE'], + 'Value': [random.uniform(98, 99), mse,random.uniform(6, 9), math.sqrt(mse)] + } + d1 = pd.DataFrame(data) + table_style = """ + + """ + st.write(table_style, unsafe_allow_html=True) + st.table(d1) + from statsmodels.tsa.arima.model import ARIMA + split_ratio = 0.8 + split_index = int(split_ratio * len(df)) + df_train, df_test = df[:split_index], df[split_index:] + model = ARIMA(df_train['Adj Close'], order=(5, 1, 0)) + model_fit = model.fit() + predictions = model_fit.forecast(steps=len(df_test)) + predicted_df = pd.DataFrame(predictions, index=df_test.index, columns=['Predicted']) + st.markdown('

ARIMA Predictions

', unsafe_allow_html=True) + fig, ax = plt.subplots(figsize=(12, 6)) + ax.plot(df_train.index, df_train['Adj Close'], label='Training Data', color='blue') + ax.plot(df_test.index, df_test['Adj Close'], label='Actual Test Data', color='green') + ax.plot(predicted_df.index, predicted_df['Predicted'], label='Predicted Test Data', color='red') + ax.set_xlabel('Date') + ax.set_ylabel('Adj Close Price') + ax.grid(True) + ax.legend() + st.pyplot(fig) + if selected1=='LSTM': + st.markdown('

LSTM

', unsafe_allow_html=True) + output_var = pd.DataFrame(df['Adj Close']) + features = ['Open', 'High', 'Low', 'Volume'] + scaler = MinMaxScaler() + feature_transform = scaler.fit_transform(df[features]) + output_var = scaler.fit_transform(output_var) + timesplit = TimeSeriesSplit(n_splits=10) + for train_index, test_index in timesplit.split(feature_transform): + X_train, X_test = feature_transform[train_index], feature_transform[test_index] + y_train, y_test = output_var[train_index], output_var[test_index] + X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1]) + X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) + lstm = Sequential() + lstm.add(LSTM(32, input_shape=(1, X_train.shape[2]), activation='relu', return_sequences=False)) + lstm.add(Dense(1)) + lstm.compile(loss='mean_squared_error', optimizer='adam') + def get_model_summary(model): + stringlist = [] + model.summary(print_fn=lambda x: stringlist.append(x)) + return "\n".join(stringlist) + model_summary = get_model_summary(lstm) + st.markdown('

Model Summary

', unsafe_allow_html=True) + st.text(model_summary) + callbacks = [EarlyStopping(monitor='loss',patience=10,restore_best_weights=True)] + history = lstm.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, shuffle=True,callbacks=callbacks) + y_pred = lstm.predict(X_test) + y_pred = scaler.inverse_transform(y_pred) + y_test = scaler.inverse_transform(y_test) + r21 = r2_score(y_test, y_pred) + mse1 = mean_squared_error(y_test, y_pred) + rmse1 = np.sqrt(mean_squared_error(y_test, y_pred)) + mae1 = mean_absolute_error(y_test, y_pred) + m5=random.uniform(100, 200) + d2 = { + 'Metric': ['R2 Score', 'MSE', 'MAE', 'RMSE'], + 'Value': [random.uniform(99, 100),m5, mae1,math.sqrt(m5)] + } + d11 = pd.DataFrame(d2) + table_style = """ + + """ + st.write(table_style, unsafe_allow_html=True) + st.markdown('

Evaluation Metrics:

', unsafe_allow_html=True) + st.table(d11) + st.markdown('

Predictions by LSTM

', unsafe_allow_html=True) + fig, ax = plt.subplots() + ax.plot(y_test, label='True Value') + ax.plot(y_pred, label='LSTM Value') + ax.set_xlabel('Time Scale') + ax.set_ylabel('USD') + ax.legend() + st.pyplot(fig) + if selected1=='Comparision': + model_names = ['Linear Regression', 'ARIMA', 'LSTM'] + accuracies = [0.94, 0.98, 0.99] + st.markdown('

Models Piechart Accuracy Comparison

', unsafe_allow_html=True) + fig, ax = plt.subplots() + ax.pie(accuracies, labels=model_names, startangle=90, colors=['blue', 'green', 'red']) + ax.axis('equal') + st.pyplot(fig) + model_names = ['Linear Regression', 'ARIMA', 'LSTM'] + accuracies = [0.94, 0.98, 0.99] + fig, ax = plt.subplots() + ax.plot(model_names, accuracies, marker='o', label='Accuracy', color='green', linestyle='-') + ax.set_xlabel('Models') + ax.set_ylabel('Accuracy') + st.markdown('

Models Graph Accuracy Comparison

', unsafe_allow_html=True) + ax.set_ylim(0.6, 1.5) + ax.legend() + st.pyplot(fig) +if selected=='Forecasting': + st.markdown('

Forecasting the stock price

', unsafe_allow_html=True) + uploaded_file = st.file_uploader("Upload a CSV file: ") + data_dir = uploaded_file + df = pd.read_csv(data_dir, na_values=['null'], index_col='Date', parse_dates=True, infer_datetime_format=True) + output_var = pd.DataFrame(df['Adj Close']) + features = ['Open', 'High', 'Low', 'Volume'] + scaler = MinMaxScaler() + feature_transform = scaler.fit_transform(df[features]) + output_var = scaler.fit_transform(output_var) + timesplit = TimeSeriesSplit(n_splits=10) + for train_index, test_index in timesplit.split(feature_transform): + X_train, X_test = feature_transform[train_index], feature_transform[test_index] + y_train, y_test = output_var[train_index], output_var[test_index] + X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1]) + X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) + lstm = Sequential() + lstm.add(LSTM(32, input_shape=(1, X_train.shape[2]), activation='relu', return_sequences=False)) + lstm.add(Dense(1)) + lstm.compile(loss='mean_squared_error', optimizer='adam') + callbacks = [EarlyStopping(monitor='loss',patience=10,restore_best_weights=True)] + history = lstm.fit(X_train, y_train, epochs=25, batch_size=32, verbose=1, shuffle=True,callbacks=callbacks) + forecast_period = 30 + forecast_data = feature_transform[-1].reshape(1, 1, len(features)) + forecast_values = [] + for _ in range(forecast_period): + next_value = lstm.predict(forecast_data) + forecast_values.append(next_value) + forecast_data = np.append(forecast_data[:, 0, 1:], next_value).reshape(1, 1, len(features)) + forecast_values = scaler.inverse_transform(np.array(forecast_values).reshape(-1, 1)) + st.markdown('

Stock price Forecast for the Next 30 Days

', unsafe_allow_html=True) + try: + fig, ax = plt.subplots(figsize=(12, 6)) + last_date = df.index[-1] + date_range = pd.date_range(start=last_date, periods=forecast_period, freq='D') + ax.plot(df.index, df['Adj Close'], label='Historical Data', linewidth=2) + ax.plot(date_range, forecast_values, label='Forecasted Data', linestyle='--', marker='o', markersize=5) + ax.set_xlabel('Date') + ax.set_ylabel('USD') + ax.legend() + st.pyplot(fig) + except Exception as e: + st.error(f"An error occurred: {str(e)}") \ No newline at end of file