-
Notifications
You must be signed in to change notification settings - Fork 0
/
visualization.py
210 lines (183 loc) · 8.57 KB
/
visualization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
# Load the dataset
df = pd.read_csv("Crop_recommendation.csv")
def show_visualization():
st.title("Advanced Data Visualization")
# Sample 100 rows from the dataset
df_sample = df.sample(n=100, random_state=1)
# Filter numeric columns for selection
numeric_columns = df_sample.select_dtypes(include=['float64', 'int64']).columns
# 1. Heatmap with Annotations
st.subheader("Heatmap with Annotations")
heatmap_feature = st.multiselect("Select Features for Heatmap with Annotations", numeric_columns)
if heatmap_feature:
fig, ax = plt.subplots()
sns.heatmap(df_sample[heatmap_feature].corr(), annot=True, cmap='coolwarm', ax=ax)
ax.set_title('Heatmap with Annotations')
st.pyplot(fig)
# 2. Pair Grid
st.subheader("Pair Grid")
pairgrid_features = st.multiselect("Select Features for Pair Grid", numeric_columns)
if pairgrid_features:
g = sns.PairGrid(df_sample[pairgrid_features])
g.map_lower(sns.scatterplot)
g.map_diag(sns.kdeplot)
st.pyplot(g.fig)
# 3. Regression Plot
st.subheader("Regression Plot")
reg_x = st.selectbox("Select X-axis for Regression Plot", numeric_columns)
reg_y = st.selectbox("Select Y-axis for Regression Plot", numeric_columns)
if reg_x and reg_y:
fig, ax = plt.subplots()
sns.regplot(x=df_sample[reg_x], y=df_sample[reg_y], ax=ax, color='blue')
ax.set_title(f'Regression Plot: {reg_x} vs {reg_y}')
st.pyplot(fig)
# 4. Residual Plot
st.subheader("Residual Plot")
res_x = st.selectbox("Select X-axis for Residual Plot", numeric_columns)
res_y = st.selectbox("Select Y-axis for Residual Plot", numeric_columns)
if res_x and res_y:
fig, ax = plt.subplots()
sns.residplot(x=df_sample[res_x], y=df_sample[res_y], ax=ax, color='orange')
ax.set_title(f'Residual Plot: {res_x} vs {res_y}')
st.pyplot(fig)
# 5. Joint Plot with KDE
st.subheader("Joint Plot with KDE")
joint_x = st.selectbox("Select X for Joint Plot with KDE", numeric_columns)
joint_y = st.selectbox("Select Y for Joint Plot with KDE", numeric_columns)
if joint_x and joint_y:
fig = sns.jointplot(x=joint_x, y=joint_y, data=df_sample, kind='kde', color='purple')
st.pyplot(fig)
# 6. Rug Plot with KDE Overlay
st.subheader("Rug Plot with KDE Overlay")
rug_feature = st.selectbox("Select Feature for Rug Plot with KDE", numeric_columns)
if rug_feature:
fig, ax = plt.subplots()
sns.kdeplot(df_sample[rug_feature], ax=ax, color='red', fill=True)
sns.rugplot(df_sample[rug_feature], ax=ax, color='blue')
ax.set_title(f'Rug Plot with KDE Overlay of {rug_feature}')
st.pyplot(fig)
# 7. Hexbin Plot
st.subheader("Hexbin Plot")
hex_x = st.selectbox("Select X-axis for Hexbin Plot", numeric_columns)
hex_y = st.selectbox("Select Y-axis for Hexbin Plot", numeric_columns)
if hex_x and hex_y:
fig, ax = plt.subplots()
hb = ax.hexbin(df_sample[hex_x], df_sample[hex_y], gridsize=30, cmap='inferno')
fig.colorbar(hb, ax=ax)
ax.set_xlabel(hex_x)
ax.set_ylabel(hex_y)
ax.set_title(f'Hexbin Plot: {hex_x} vs {hex_y}')
st.pyplot(fig)
# 8. Andrews Curves
# 8. Andrews Curves
st.subheader("Andrews Curves")
if 'label' in df.columns:
fig, ax = plt.subplots()
pd.plotting.andrews_curves(df, 'label', ax=ax)
ax.set_title('Andrews Curves')
st.pyplot(fig)
# 9. Lag Plot
st.subheader("Lag Plot")
lag_feature = st.selectbox("Select Feature for Lag Plot", numeric_columns)
if lag_feature:
fig, ax = plt.subplots()
lag_plot(df_sample[lag_feature], ax=ax)
ax.set_title(f'Lag Plot of {lag_feature}')
st.pyplot(fig)
# 10. Parallel Coordinates
st.subheader("Parallel Coordinates")
if 'label' in df.columns:
fig, ax = plt.subplots()
pd.plotting.parallel_coordinates(df, 'label', ax=ax)
ax.set_title('Parallel Coordinates')
st.pyplot(fig)
# 11. Bubble Plot
st.subheader("Bubble Plot")
bubble_x = st.selectbox("Select X-axis for Bubble Plot", numeric_columns)
bubble_y = st.selectbox("Select Y-axis for Bubble Plot", numeric_columns)
bubble_size = st.selectbox("Select Size Feature for Bubble Plot", numeric_columns)
if bubble_x and bubble_y and bubble_size:
fig, ax = plt.subplots()
sns.scatterplot(x=df_sample[bubble_x], y=df_sample[bubble_y], size=df_sample[bubble_size], hue=df_sample['label'], ax=ax, palette='viridis', sizes=(20, 200))
ax.set_xlabel(bubble_x)
ax.set_ylabel(bubble_y)
ax.set_title(f'Bubble Plot: {bubble_x} vs {bubble_y}')
st.pyplot(fig)
# 12. Heatmap of Feature Pair Correlations
st.subheader("Heatmap of Feature Pair Correlations")
feature_pair_heatmap = st.multiselect("Select Features for Pair Correlation Heatmap", numeric_columns)
if feature_pair_heatmap:
fig, ax = plt.subplots()
sns.heatmap(df_sample[feature_pair_heatmap].corr(), annot=True, cmap='coolwarm', ax=ax)
ax.set_title('Heatmap of Feature Pair Correlations')
st.pyplot(fig)
# 13. Contour Plot
st.subheader("Contour Plot")
contour_x = st.selectbox("Select X-axis for Contour Plot", numeric_columns)
contour_y = st.selectbox("Select Y-axis for Contour Plot", numeric_columns)
if contour_x and contour_y:
fig, ax = plt.subplots()
sns.kdeplot(x=df_sample[contour_x], y=df_sample[contour_y], ax=ax, fill=True)
ax.set_title(f'Contour Plot: {contour_x} vs {contour_y}')
st.pyplot(fig)
# 14. Density Plot with Multiple Features
st.subheader("Density Plot with Multiple Features")
density_features = st.multiselect("Select Features for Multi-density Plot", numeric_columns)
if density_features:
fig, ax = plt.subplots()
for feature in density_features:
sns.kdeplot(df_sample[feature], ax=ax, label=feature)
ax.legend()
ax.set_title('Density Plot with Multiple Features')
st.pyplot(fig)
# 15. Custom Plot with Regression and Residual
st.subheader("Custom Plot with Regression and Residual")
custom_x = st.selectbox("Select X-axis for Custom Plot", numeric_columns)
custom_y = st.selectbox("Select Y-axis for Custom Plot", numeric_columns)
if custom_x and custom_y:
fig, ax = plt.subplots()
sns.regplot(x=df_sample[custom_x], y=df_sample[custom_y], ax=ax, color='blue', label='Regression')
sns.residplot(x=df_sample[custom_x], y=df_sample[custom_y], ax=ax, color='red', label='Residual')
ax.set_title(f'Custom Plot with Regression and Residual: {custom_x} vs {custom_y}')
ax.legend()
st.pyplot(fig)
# 16. Facet Grid
st.subheader("Facet Grid")
facet_feature = st.selectbox("Select Feature for Facet Grid", numeric_columns)
if facet_feature:
g = sns.FacetGrid(df_sample, col='label', height=4, aspect=2)
g.map(sns.histplot, facet_feature)
st.pyplot(g.fig)
# 17. Matrix Plot
st.subheader("Matrix Plot")
matrix_features = st.multiselect("Select Features for Matrix Plot", numeric_columns)
if matrix_features:
pairplot_fig = sns.pairplot(df_sample[matrix_features])
pairplot_fig.fig.set_size_inches(12, 8)
pairplot_fig.fig.suptitle('Matrix Plot', y=1.02)
st.pyplot(pairplot_fig.fig)
# 19. Violin Plot
st.subheader("Violin Plot")
violin_x = st.selectbox("Select X-axis for Violin Plot", numeric_columns)
violin_y = st.selectbox("Select Y-axis for Violin Plot", numeric_columns)
if violin_x and violin_y:
fig, ax = plt.subplots()
sns.violinplot(x=df_sample[violin_x], y=df_sample[violin_y], ax=ax)
ax.set_title(f'Violin Plot: {violin_x} vs {violin_y}')
st.pyplot(fig)
# 20. Box Plot
st.subheader("Box Plot")
box_x = st.selectbox("Select X-axis for Box Plot", numeric_columns)
box_y = st.selectbox("Select Y-axis for Box Plot", numeric_columns)
if box_x and box_y:
fig, ax = plt.subplots()
sns.boxplot(x=df_sample[box_x], y=df_sample[box_y], ax=ax)
ax.set_title(f'Box Plot: {box_x} vs {box_y}')
st.pyplot(fig)
if __name__ == "__main__":
show_visualization()