forked from AtharvaKhedkar/AutoML
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
154 lines (127 loc) · 5.01 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import streamlit as st
import pandas as pd
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import seaborn as sns
from pycaret.regression import *
import base64
import io
from classification import build_classifier
#---------------------------------#
# Page layout
## Page expands to full width
st.set_page_config(page_title='The Automatic Machine Learning App',
layout='wide')
#---------------------------------#
# Model building
def build_model(df,removedfeatures):
if removedfeatures != []:
removedfeatures.append(label)
removedfeatures = set(removedfeatures)
X = df.drop(removedfeatures,axis=1)
else:
X = df.drop([label],axis=1)
Y = df[label]
st.markdown('**1.3. Dataset dimension**')
st.write('X')
st.info(X.shape)
st.write('Y')
st.info(Y.shape)
st.markdown('**1.4. Variable details**:')
st.write('X variable (first 20 are shown)')
st.info(list(X.columns[:20]))
st.write('Y variable')
st.info(Y.name)
st.markdown('**1.5. Data Correlation**:')
fig, ax = plt.subplots()
df_col = pd.concat([X,Y], axis=1)
sns.heatmap(df_col.corr(), ax=ax)
st.pyplot(fig)
# Build model
wip = 'Building your models, Please Wait....'
st.write(wip)
if usecase == "classification":
table = build_classifier(df,label)
else:
m1 = setup(data = df,target = label,silent =True)
best = compare_models()
table = pull()
st.subheader('2. Table of Model Performance')
st.write(pull())
st.markdown(filedownload(table,'model_comparison.csv'), unsafe_allow_html=True)
st.subheader('3. Plot of Model Performance')
fig = plt.figure(figsize=(15,6))
plt.xlabel('Models')
if usecase == "regression":
plt.bar(table['Model'].head(), table['R2'].head())
plt.ylabel('R Square')
else:
plt.bar(table['Model'].head(), table['Accuracy'].head())
plt.ylabel('Accuracy')
st.pyplot(fig)
st.markdown(imagedownload(fig,'r2_comparison'), unsafe_allow_html=True)
def filedownload(df, filename):
csv = df.to_csv(index=True)
b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
href = f'<a href="data:file/csv;base64,{b64}" download={filename}>Download {filename} File</a>'
return href
def imagedownload(plt, filename):
s = io.BytesIO()
plt.savefig(s, format='png', bbox_inches='tight')
b64 = base64.b64encode(s.getvalue()).decode() # strings <-> bytes conversions
href = f'<a href="data:image/png;base64,{b64}" download={filename}>Download {filename} Image</a>'
return href
#---------------------------------#
github_fork = '<a href="https://github.com/AtharvaKhedkar/AutoMl" target="_blank"><img style="position: absolute; top: 0; right: 30px; border: 0;" src="https://github.com/jamesflorentino/fork-ribbons/raw/master/ribbons/violet-white.png" alt="Fork me on GitHub"></a>'
st.write(github_fork,unsafe_allow_html=True)
st.write("""
# The Automatic Machine Learning App
""")
#---------------------------------#
# Sidebar - Collects user input features into dataframe
with st.sidebar.header('1. Upload your CSV data'):
uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"])
st.sidebar.subheader('OR')
with st.sidebar.header('Paste CSV data link here'):
uploaded_text = st.sidebar.text_input("Paste csv data link here")
if uploaded_text.endswith(".csv") and uploaded_text.startswith("http"):
uploaded_file = uploaded_text
elif uploaded_text == '':
pass
else:
st.sidebar.error('Please enter a valid csv link')
# Sidebar - Specify parameter settings
with st.sidebar.header('2. Set Parameters'):
usecase = st.sidebar.selectbox('Select dataset type (Regression/Classification)', ['regression','classification'])
#---------------------------------#
# Main panel
# Displays the dataset
st.subheader('1. Dataset')
if uploaded_file is not None:
if type(uploaded_file) is not str:
uploaded_file.seek(0)
df = pd.read_csv(uploaded_file)
st.markdown('**1.1. Glimpse of dataset**')
st.write(df)
label = None
label = st.sidebar.selectbox('Select target attribute',df.columns)
removedfeatures = st.sidebar.multiselect(
'Remove unnecessary features',
df.columns)
button = st.sidebar.button('Train Models')
if label is not None and button:
build_model(df, removedfeatures)
else:
st.info('Awaiting for CSV file to be uploaded.')
if st.button('Press to use Example Dataset'):
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
Y = pd.Series(boston.target, name='Price')
label = 'Price'
usecase = 'regression'
df = pd.concat( [X,Y], axis=1 )
st.markdown('The Boston housing dataset is used as the example.')
st.write(df.head(5))
build_model(df,removedfeatures = [])
with st.sidebar.subheader('Created by:'):
st.sidebar.markdown('''[Atharva Khedkar](https://linktr.ee/atharvakhedkar/)''')