Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

building a website for stock price prediction #172

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions stock_price_prediction_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb
import matplotlib.pyplot as plt
import streamlit as st
from sklearn.impute import SimpleImputer

# Load and preprocess the dataset
@st.cache_data
def load_data():
df = pd.read_csv('SBIN.csv')
df.drop(['Date', 'Adj Close'], axis=1, inplace=True) # Drop irrelevant columns

# Handle missing values
imputer = SimpleImputer(strategy='mean')
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

# Calculate VWAP
df['VWAP'] = (df['Close'] * df['Volume']).cumsum() / df['Volume'].cumsum()

return df

# Train the models
def train_models(df):
# Select features and target variable
X = df[['Open', 'High', 'Low', 'Volume', 'VWAP']]
y = df['Close']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

models = {
'Linear Regression': LinearRegression(),
'SVR': SVR(),
'Random Forest': RandomForestRegressor(),
'Gradient Boosting': GradientBoostingRegressor(),
'XGBoost': xgb.XGBRegressor(),
'AdaBoost': AdaBoostRegressor(),
'Decision Tree': DecisionTreeRegressor(),
'KNeighbors': KNeighborsRegressor()
}

trained_models = {}

for name, model in models.items():
model.fit(X_train_scaled, y_train)
trained_models[name] = model

return trained_models, scaler, X_test_scaled, y_test

# Load data and train models
df = load_data()
trained_models, scaler, X_test_scaled, y_test = train_models(df)

# Streamlit app layout
st.title("Stock Price Prediction App")

# User input for stock features
open_price = st.number_input("Open Price:", min_value=0.0, format="%.2f")
high_price = st.number_input("High Price:", min_value=0.0, format="%.2f")
low_price = st.number_input("Low Price:", min_value=0.0, format="%.2f")
volume = st.number_input("Volume:", min_value=0, format="%d")

# Calculate VWAP for user input
vwap = (open_price + high_price + low_price) / 3 # Simplified VWAP calculation

# Dropdown for model selection
model_selection = st.selectbox("Select Model for Prediction:", options=list(trained_models.keys()))

# Button to predict
if st.button("Predict Closing Price"):
input_data = np.array([[open_price, high_price, low_price, volume, vwap]])
input_scaled = scaler.transform(input_data)

# Make prediction using the selected model
model = trained_models[model_selection]
prediction = model.predict(input_scaled)

# Display the prediction
st.success(f"The predicted closing price is: ${prediction[0]:.2f}")

# Optional: Visualize KNN Hyperparameter Tuning
if st.checkbox("Show KNN Hyperparameter Tuning Plot"):
k_values = range(1, 31) # Example range for k
mse_values = []

X = df[['Open', 'High', 'Low', 'Volume', 'VWAP']]
y = df['Close']

for k in k_values:
knn_model = KNeighborsRegressor(n_neighbors=k)
mse = -cross_val_score(knn_model, scaler.transform(X), y, cv=5, scoring='neg_mean_squared_error').mean()
mse_values.append(mse)

plt.figure(figsize=(10, 6))
plt.plot(k_values, mse_values, marker='o', linestyle='-')
plt.title('KNN Hyperparameter Tuning: MSE vs. Number of Neighbors')
plt.xlabel('Number of Neighbors (k)')
plt.ylabel('Mean Squared Error (MSE)')
plt.xticks(k_values) # Show all k values on x-axis
plt.grid()

# Show the plot in Streamlit
st.pyplot(plt)
Loading