-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamlit_app.py
92 lines (72 loc) · 3.03 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
import pickle
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.preprocessing import MinMaxScaler
# Load the model and features
def load_model_and_features(model_path, features_path):
"""Load the saved LightGBM model and selected features from pickle files."""
with open(model_path, 'rb') as file:
model = pickle.load(file)
with open(features_path, 'rb') as file:
selected_features = pickle.load(file)
return model, selected_features
# Enhanced feature engineering
def create_features(df):
df = df.copy()
df['hour'] = df.index.hour
df['day'] = df.index.day
df['month'] = df.index.month
df['dayofweek'] = df.index.dayofweek
df['quarter'] = df.index.quarter
df['is_weekend'] = df.index.dayofweek.isin([5, 6]).astype(int)
df['is_month_start'] = df.index.is_month_start.astype(int)
df['is_month_end'] = df.index.is_month_end.astype(int)
# Cyclical encoding for time features
df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
return df
def preprocess_data(df, selected_features, scaler=None):
"""Preprocess the data by creating features, selecting relevant ones, and scaling them."""
df = create_features(df)
df = df.dropna() # Remove rows with NaN values
# Prepare features
X = df[selected_features]
# Scale the features
if scaler is None:
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns, index=X.index)
else:
X_scaled = pd.DataFrame(scaler.transform(X), columns=X.columns, index=X.index)
return X_scaled, scaler
def predict(model, X_scaled):
"""Use the trained model to make predictions."""
return model.predict(X_scaled, num_iteration=model.best_iteration)
# Load the model and features
model_path = 'LightGBM_2 (1).pkl'
features_path = 'selected_features_2 (1).pkl'
model, selected_features = load_model_and_features(model_path, features_path)
# Streamlit app
st.title("LightGBM Inference App")
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
if uploaded_file is not None:
new_data = pd.read_csv(uploaded_file, index_col='DATE_TIME', parse_dates=True)
# Preprocess the new data
X_scaled, scaler = preprocess_data(new_data, selected_features)
# Make predictions
y_pred = predict(model, X_scaled)
# Add predictions to the dataframe
new_data["DAILY_YIELD"] = y_pred
# Display the dataframe with predictions
st.write(new_data)
# Optionally, you can provide a download button for the result
csv = new_data.to_csv(index=True)
st.download_button(
label="Download Predictions",
data=csv,
file_name='predictions.csv',
mime='text/csv'
)