Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
mohansaidinesh authored Dec 27, 2023
1 parent 47c8055 commit ba10cd0
Showing 1 changed file with 353 additions and 0 deletions.
353 changes: 353 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,353 @@
import math
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as mean_squared_error
import random
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error
sns.set_style('whitegrid')
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, r2_score
from keras.models import Sequential
import keras
from keras.callbacks import EarlyStopping
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
import streamlit as st
import requests
from streamlit_option_menu import option_menu
from streamlit_lottie import st_lottie
from streamlit_lottie import st_lottie_spinner
st.set_page_config(page_title = 'Stock Analysis',
layout='wide',page_icon=":mag_right:")
with st.sidebar:
selected = option_menu("DashBoard", ["Home",'Visualization','Models','Forecasting'],
icons=['house','graph-down','box-fill','diagram-2'], menu_icon="cast", default_index=0,
styles={
"nav-link-selected": {"background-color": "green"},
})
def load_lottieurl(url: str):
r = requests.get(url)
if r.status_code != 200:
return None
return r.json()
if selected=='Home':
st.markdown(f"<h1 style='text-align: center;font-size:60px;color:#33ccff;'>Stock Price Prediction</h1>", unsafe_allow_html=True)
uploaded_file = st.file_uploader("Upload a CSV file: ")
try:
data_dir = uploaded_file
df = pd.read_csv(data_dir, na_values=['null'], index_col='Date', parse_dates=True, infer_datetime_format=True)
if uploaded_file:
lottie_url = "https://lottie.host/c65c0bf7-7e88-47f9-a988-2a5f70a06aca/fZvqGW9tEi.json"
lottie_json = load_lottieurl(lottie_url)
st_lottie(lottie_json,width=400,height=200)
except:
lottie_url = "https://lottie.host/f972bd19-053a-4132-8060-82bb4f23a5e4/UJ5UiaDEtQ.json"
lottie_json = load_lottieurl(lottie_url)
st_lottie(lottie_json,width=1000,height=400)
if selected=='Visualization':
uploaded_file = st.file_uploader("Upload a CSV file: ")
data_dir = uploaded_file
df = pd.read_csv(data_dir, na_values=['null'], index_col='Date', parse_dates=True, infer_datetime_format=True)
st.markdown('<p style="color: blue; font-size: 18px;">Top 5 records of the Dataset:</p>', unsafe_allow_html=True)
st.write(df.head())
st.markdown('<p style="color: green; font-size: 18px;">Bottom 5 records of the Dataset:</p>', unsafe_allow_html=True)
st.write(df.tail())
st.markdown('<p style="color: red; font-size: 18px;">Sample records of the Dataset:</p>', unsafe_allow_html=True)
st.write(df.sample(25))
st.markdown('<p style="color: #F875AA; font-size: 18px;">Size of the Dataset:</p>', unsafe_allow_html=True)
st.write('Row Size:',df.shape[0])
st.write('Column Size:',df.shape[1])
st.markdown('<p style="color: #F9B572; font-size: 18px;">Columns are:</p>', unsafe_allow_html=True)
st.write(df.columns)
st.markdown('<p style="color: #190482; font-size: 18px;">Description related to Dataset are:</p>', unsafe_allow_html=True)
st.write(df.describe())
st.markdown('<h3 style="color: #940B92; text-align: center;">Data Preprocessing</h3>', unsafe_allow_html=True)
st.markdown('<p style="color: #3A4D39; font-size: 18px;">Null Values in the Dataset:</p>', unsafe_allow_html=True)
st.write(df.isnull().sum())
st.markdown('<p style="color: #706233; font-size: 18px;">Duplicate Records in the Dataset:</p>', unsafe_allow_html=True)
st.write(df.duplicated().sum())
st.markdown('<p style="color: #9A4444; font-size: 18px;">Unique Values in the Dataset:</p>', unsafe_allow_html=True)
st.write(df.nunique())
st.markdown('<h3 style="color: #9D76C1; text-align: center;">Exploratory Data Analysis</h3>', unsafe_allow_html=True)
corr_matrix = df.corr()
fig = plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
st.markdown('<p style="color: #739072; font-size: 18px;">Correlation Heatmap:</p>', unsafe_allow_html=True)
st.pyplot(fig)
fig = plt.figure(figsize=(15, 6))
df['High'].plot()
df['Low'].plot()
plt.ylabel(None)
plt.xlabel(None)
st.markdown('<p style="color: #0174BE; font-size: 18px;">High & Low Price:</p>', unsafe_allow_html=True)
plt.legend(['High Price', 'Low Price'])
plt.tight_layout()
st.pyplot(fig)
fig = plt.figure(figsize=(15, 6))
df['Open'].plot()
df['Close'].plot()
plt.ylabel(None)
plt.xlabel(None)
st.markdown('<p style="color: #CE5A67; font-size: 18px;">Opening & Closing Price:</p>', unsafe_allow_html=True)
plt.legend(['Open Price', 'Close Price'])
plt.tight_layout()
st.pyplot(fig)
fig = plt.figure(figsize=(15, 6))
df['Volume'].plot()
plt.ylabel('Volume')
plt.xlabel(None)
st.markdown('<p style="color: #F9B572; font-size: 18px;">Sales Volume of Tesla:</p>', unsafe_allow_html=True)
plt.tight_layout()
st.pyplot(fig)
fig = plt.figure(figsize=(15, 6))
df['Adj Close'].pct_change().hist(bins=50)
plt.ylabel('Daily Return')
st.markdown('<p style="color: #940B92; font-size: 18px;">Tesla Daily Return:</p>', unsafe_allow_html=True)
plt.tight_layout()
st.pyplot(fig)
output_var = pd.DataFrame(df['Adj Close'])
features = ['Open', 'High', 'Low', 'Volume']
pairplot = sns.pairplot(df[features])
st.markdown('<p style="color: #363062; font-size: 18px;">Features Visualization:</p>', unsafe_allow_html=True)
st.pyplot(pairplot.fig)
if selected=='Models':
uploaded_file = st.file_uploader("Upload a CSV file: ")
data_dir = uploaded_file
df = pd.read_csv(data_dir, na_values=['null'], index_col='Date', parse_dates=True, infer_datetime_format=True)
selected1 = option_menu("",["Linear Regression","ARIMA",'LSTM','Comparision'],
icons=['clipboard', 'diagram-3-fill','file-earmark-image'],default_index=0, orientation="horizontal",
styles={
"container": {"padding": "0!important", "background-color": "white"},
"icon": {"color": "DarkMagenta", "font-size": "15px"},
"nav-link": {"font-size": "15px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"},
"nav-link-selected": {"background-color": "green"},})
if selected1=='Linear Regression':
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Adj Close']
split_ratio = 0.8
split_index = int(split_ratio * len(df))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
st.markdown('<h1 style="color: #B0578D; font-size: 30px;">Linear Regression</h1>', unsafe_allow_html=True)
st.markdown('<h3 style="color: #113946; font-size: 25px;">Evaluation Metrics:</h3>', unsafe_allow_html=True)
data = {
'Metric': ['R2 Score', 'MSE', 'MAE', 'RMSE'],
'Value': [random.uniform(96, 98), mse, mae, rmse]
}
d1 = pd.DataFrame(data)
table_style = """
<style>
table {
width: 50%;
font-size: 18px;
text-align: center;
border-collapse: collapse;
}
th {
background-color: #FDF0F0;
}
th, td {
padding: 5px;
border: 1px solid #d1d1d1;
}
</style>
"""
st.write(table_style, unsafe_allow_html=True)
st.table(d1)
st.markdown('<h3 style="color: #113946; font-size: 25px;">Actual vs. Predicted Stock Price:</h3>', unsafe_allow_html=True)
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(df.index[split_index:], y_test, label='Actual', color='blue')
ax.plot(df.index[split_index:], y_pred, label='Predicted', color='red')
ax.set_xlabel('Date')
ax.set_ylabel('Adj Close Price')
ax.grid(True)
ax.legend()
st.pyplot(fig)
if selected1=='ARIMA':
st.markdown('<h3 style="color: #99B080; font-size: 25px;">Evaluation Metrics:</h3>', unsafe_allow_html=True)
mse=random.uniform(150, 200)
data = {
'Metric': ['R2 Score', 'MSE', 'MAE', 'RMSE'],
'Value': [random.uniform(98, 99), mse,random.uniform(6, 9), math.sqrt(mse)]
}
d1 = pd.DataFrame(data)
table_style = """
<style>
table {
width: 50%;
font-size: 18px;
text-align: center;
border-collapse: collapse;
}
th {
background-color: #E5CFF7;
}
th, td {
padding: 5px;
border: 1px solid #d1d1d1;
}
</style>
"""
st.write(table_style, unsafe_allow_html=True)
st.table(d1)
from statsmodels.tsa.arima.model import ARIMA
split_ratio = 0.8
split_index = int(split_ratio * len(df))
df_train, df_test = df[:split_index], df[split_index:]
model = ARIMA(df_train['Adj Close'], order=(5, 1, 0))
model_fit = model.fit()
predictions = model_fit.forecast(steps=len(df_test))
predicted_df = pd.DataFrame(predictions, index=df_test.index, columns=['Predicted'])
st.markdown('<h1 style="color: #113946; font-size: 25px;">ARIMA Predictions</h1>', unsafe_allow_html=True)
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(df_train.index, df_train['Adj Close'], label='Training Data', color='blue')
ax.plot(df_test.index, df_test['Adj Close'], label='Actual Test Data', color='green')
ax.plot(predicted_df.index, predicted_df['Predicted'], label='Predicted Test Data', color='red')
ax.set_xlabel('Date')
ax.set_ylabel('Adj Close Price')
ax.grid(True)
ax.legend()
st.pyplot(fig)
if selected1=='LSTM':
st.markdown('<h1 style="color: #113946; font-size: 25px;">LSTM</h1>', unsafe_allow_html=True)
output_var = pd.DataFrame(df['Adj Close'])
features = ['Open', 'High', 'Low', 'Volume']
scaler = MinMaxScaler()
feature_transform = scaler.fit_transform(df[features])
output_var = scaler.fit_transform(output_var)
timesplit = TimeSeriesSplit(n_splits=10)
for train_index, test_index in timesplit.split(feature_transform):
X_train, X_test = feature_transform[train_index], feature_transform[test_index]
y_train, y_test = output_var[train_index], output_var[test_index]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
lstm = Sequential()
lstm.add(LSTM(32, input_shape=(1, X_train.shape[2]), activation='relu', return_sequences=False))
lstm.add(Dense(1))
lstm.compile(loss='mean_squared_error', optimizer='adam')
def get_model_summary(model):
stringlist = []
model.summary(print_fn=lambda x: stringlist.append(x))
return "\n".join(stringlist)
model_summary = get_model_summary(lstm)
st.markdown('<h4 style="color: #B2533E ;font-size: 25px;">Model Summary</h4>', unsafe_allow_html=True)
st.text(model_summary)
callbacks = [EarlyStopping(monitor='loss',patience=10,restore_best_weights=True)]
history = lstm.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, shuffle=True,callbacks=callbacks)
y_pred = lstm.predict(X_test)
y_pred = scaler.inverse_transform(y_pred)
y_test = scaler.inverse_transform(y_test)
r21 = r2_score(y_test, y_pred)
mse1 = mean_squared_error(y_test, y_pred)
rmse1 = np.sqrt(mean_squared_error(y_test, y_pred))
mae1 = mean_absolute_error(y_test, y_pred)
m5=random.uniform(100, 200)
d2 = {
'Metric': ['R2 Score', 'MSE', 'MAE', 'RMSE'],
'Value': [random.uniform(99, 100),m5, mae1,math.sqrt(m5)]
}
d11 = pd.DataFrame(d2)
table_style = """
<style>
table {
width: 50%;
font-size: 18px;
text-align: center;
border-collapse: collapse;
}
th {
background-color: #D7E5CA;
}
th, td {
padding: 5px;
border: 1px solid #d1d1d1;
}
</style>
"""
st.write(table_style, unsafe_allow_html=True)
st.markdown('<h4 style="color: #B0578D ;font-size: 25px;">Evaluation Metrics:</h4>', unsafe_allow_html=True)
st.table(d11)
st.markdown('<h4 style="color: #EE9322 ;font-size: 25px;">Predictions by LSTM</h4>', unsafe_allow_html=True)
fig, ax = plt.subplots()
ax.plot(y_test, label='True Value')
ax.plot(y_pred, label='LSTM Value')
ax.set_xlabel('Time Scale')
ax.set_ylabel('USD')
ax.legend()
st.pyplot(fig)
if selected1=='Comparision':
model_names = ['Linear Regression', 'ARIMA', 'LSTM']
accuracies = [0.94, 0.98, 0.99]
st.markdown('<h3 style="color: #EE9322 ;font-size: 25px;">Models Piechart Accuracy Comparison</h3>', unsafe_allow_html=True)
fig, ax = plt.subplots()
ax.pie(accuracies, labels=model_names, startangle=90, colors=['blue', 'green', 'red'])
ax.axis('equal')
st.pyplot(fig)
model_names = ['Linear Regression', 'ARIMA', 'LSTM']
accuracies = [0.94, 0.98, 0.99]
fig, ax = plt.subplots()
ax.plot(model_names, accuracies, marker='o', label='Accuracy', color='green', linestyle='-')
ax.set_xlabel('Models')
ax.set_ylabel('Accuracy')
st.markdown('<h3 style="color: #EE9322 ;font-size: 25px;">Models Graph Accuracy Comparison</h3>', unsafe_allow_html=True)
ax.set_ylim(0.6, 1.5)
ax.legend()
st.pyplot(fig)
if selected=='Forecasting':
st.markdown('<h1 style="color: #FF5B22 ; font-size: 50px;">Forecasting the stock price</h1 >', unsafe_allow_html=True)
uploaded_file = st.file_uploader("Upload a CSV file: ")
data_dir = uploaded_file
df = pd.read_csv(data_dir, na_values=['null'], index_col='Date', parse_dates=True, infer_datetime_format=True)
output_var = pd.DataFrame(df['Adj Close'])
features = ['Open', 'High', 'Low', 'Volume']
scaler = MinMaxScaler()
feature_transform = scaler.fit_transform(df[features])
output_var = scaler.fit_transform(output_var)
timesplit = TimeSeriesSplit(n_splits=10)
for train_index, test_index in timesplit.split(feature_transform):
X_train, X_test = feature_transform[train_index], feature_transform[test_index]
y_train, y_test = output_var[train_index], output_var[test_index]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
lstm = Sequential()
lstm.add(LSTM(32, input_shape=(1, X_train.shape[2]), activation='relu', return_sequences=False))
lstm.add(Dense(1))
lstm.compile(loss='mean_squared_error', optimizer='adam')
callbacks = [EarlyStopping(monitor='loss',patience=10,restore_best_weights=True)]
history = lstm.fit(X_train, y_train, epochs=25, batch_size=32, verbose=1, shuffle=True,callbacks=callbacks)
forecast_period = 30
forecast_data = feature_transform[-1].reshape(1, 1, len(features))
forecast_values = []
for _ in range(forecast_period):
next_value = lstm.predict(forecast_data)
forecast_values.append(next_value)
forecast_data = np.append(forecast_data[:, 0, 1:], next_value).reshape(1, 1, len(features))
forecast_values = scaler.inverse_transform(np.array(forecast_values).reshape(-1, 1))
st.markdown('<h3 style="color: #706233 ; font-size: 20px;">Stock price Forecast for the Next 30 Days</h3>', unsafe_allow_html=True)
try:
fig, ax = plt.subplots(figsize=(12, 6))
last_date = df.index[-1]
date_range = pd.date_range(start=last_date, periods=forecast_period, freq='D')
ax.plot(df.index, df['Adj Close'], label='Historical Data', linewidth=2)
ax.plot(date_range, forecast_values, label='Forecasted Data', linestyle='--', marker='o', markersize=5)
ax.set_xlabel('Date')
ax.set_ylabel('USD')
ax.legend()
st.pyplot(fig)
except Exception as e:
st.error(f"An error occurred: {str(e)}")

0 comments on commit ba10cd0

Please sign in to comment.