-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_lorenzo.py
242 lines (182 loc) · 11.3 KB
/
test_lorenzo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import streamlit as st
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
st.title("Art Valuation App \U0001F3A8")
st.header('CS Project by Group 2.2')
df = pd.read_csv(r"C:\Users\loren\OneDrive\Documents\5 Semester\CS\Groupe project\cleaned_tow.csv")
# new columns names
names_new_columns = {'artist_name':'Artist Name','year_artist_born':'Birth year','age':'Age','length':'Length','width':'Width','title_of_work':'Title','height':'Height'
,'year_of_work':'Creation Year','age_at_work':'Age of Artist at Creation','lower_est_USD':'Lower est. USD','upper_est_USD':'Upper est. USD',
'sold_in_USD':'Sales Amount in USD','Material_Category':'Material Category','act_area':'Act. Area','posthumous_combined':'Posthumous Combined'}
df.rename(columns=names_new_columns, inplace= True)
#SLIDER FOR YEAR OF WORK RANGE#
##### we need to convert Creation Year to numeric, because otherwise compares int with string####
st.header('Filter for artworks by year')
df['Creation Year'] = pd.to_numeric(df['Creation Year'], errors='coerce')
# Year range slider code
min_year = int(df['Creation Year'].min()) if not pd.isnull(df['Creation Year']).all() else 0
max_year = int(df['Creation Year'].max()) if not pd.isnull(df['Creation Year']).all() else 0
selected_years = st.slider("Select a range of years", min_year, max_year, (min_year, max_year))
# filtered df by year of work and display on the app
filtered_df = df[
(df['Creation Year'] >= selected_years[0]) &
(df['Creation Year'] <= selected_years[1])]
st.write("Filtered Artworks by Year:")
st.write(filtered_df)
# Counter for the number of artworks in the selected range
num_artworks = filtered_df.shape[0]
st.markdown(f"Number of Artworks in Selected Range: **{num_artworks}**")
### TEXT INPUT BOX for ARTIST NAME SEARCH ######3
st.header('Search for an artist by name')
artist_name_input = st.text_input("Please enter an artist's name (or part of the name):")
if artist_name_input:
filtered_df = filtered_df[filtered_df['Artist Name'].str.contains(artist_name_input, case=False)]
# Show search result summary
st.subheader("Results of your Search")
if artist_name_input:
st.write(f"Showing artworks by artist(s) containing '{artist_name_input}':")
st.write(filtered_df)
else:
st.write("Please enter an artist's name (or part of it) to perform a search.")
st.header('Filter for artworks by price')
# checkbox to filter posthumous artworks
posthumous_checkbox = st.checkbox("Filter Posthumous Artworks")
# extract Sales Amount in USD column, remove unnecessary caracters, and convert to float
sold_in_usd = df['Sales Amount in USD']
# slider to filter based on sold_in_USD
min_price, max_price = st.slider("Filter by Price Range",
min_value=float(sold_in_usd.min()),
max_value=float(sold_in_usd.max()),
value=(float(0), float(sold_in_usd.max())))
# filter the dataframe based on the checkbox and slider values
filtered_2_df = df.copy()
if posthumous_checkbox:
# only true is considered posthumous
filtered_2_df = filtered_2_df[filtered_2_df['Posthumous Combined'].isin([1])]
filtered_2_df['Sales Amount in USD'] = sold_in_usd
filtered_2_df = filtered_2_df[(filtered_2_df['Sales Amount in USD'] >= min_price) & (filtered_2_df['Sales Amount in USD'] <= max_price)]
# Display the dataframe
st.write("Filtered Artworks by Price:")
st.write(filtered_2_df)
# Counter for the number of artworks in the selected range
num_artworks_price = filtered_2_df.shape[0]
st.markdown(f"Number of Artworks in Selected Range: **{num_artworks_price}**")
#Slider for size of the work (area column)
st.header('Filter for artworks by Size')
# extract Act. Area
act_area = df['Act. Area']
# slider to filter based on Act. Area
min_area, max_area = st.slider("Filter by Size Range",
min_value=float(act_area.min()),
max_value=float(act_area.max()),
value=(float(0), float(act_area.max())))
# filter the dataframe based on the checkbox and slider values
filtered_3_df = df.copy()
filtered_3_df['Act. Area'] = act_area
filtered_3_df = filtered_3_df[(filtered_3_df['Act. Area'] >= min_area) & (filtered_3_df['Act. Area'] <= max_area)]
# Display the dataframe
st.write("Filtered Artworks by Size:")
st.write(filtered_3_df)
# Counter for the number of artworks in the selected range
num_artworks_size = filtered_3_df.shape[0]
st.markdown(f"Number of Artworks in Selected Range: **{num_artworks_size}**")
#Multi-checkbox, Filter by Material Category (used ChatGPT)
st.header('Filter for artworks by Material Category')
# Define the filter options
filter_options = {
'Paints and Pigments': 1,
'Metals and Sculpting Materials': 2,
'Printmaking and Graphic Arts': 3,
'Drawing and Writing Tools': 4,
'Mixed Media and Miscellaneous': 5,
'Traditional and Specialized Techniques': 6,
'Photography and Digital Art': 7,
'Sculpting and Carving': 8,
'Unconventional and Unique Techniques': 9,
'Other Specific Techniques or Materials': 10
}
# Multicheckbox for filtering based on categories
selected_categories = st.multiselect("Select Material Category", filter_options.keys())
# Checkbox for showing more information
# Information about categories
categories = {
'Paints and Pigments': ['This category contains artworks made of','oil', 'acrylic', 'spray', 'pastel', 'tempera', 'watercolor', 'watercolour', 'pigment', 'color'],
'Metals and Sculpting Materials' : ['This category contains artworks made of','bronze', 'gold', 'patinated', 'wrought', 'alabaster', 'polished', 'copper', 'stainless', 'wood', 'cast', 'silver', 'steel', 'aluminum', 'walnut', 'plumbago', 'stone', 'corten', 'metallised', 'lead', 'burnished', 'brass', 'earthenware'],
'Printmaking and Graphic Arts': ['This category contains artworks made of','cprint', 'offset', 'charcoal', 'screenprint', 'letterpress', 'etching'],
'Drawing and Writing Tools': ['This category contains artworks made of','ballpoint', 'pen', 'signed', 'crayon', 'pencil', 'conte', 'marker', 'felttip'],
'Mixed Media and Miscellaneous': ['This category contains artworks made of','embroidery', 'synthetic', 'variable', 'mixed', 'ikb', 'paper', 'painted', 'polymer', 'glazed', 'alkyd', 'resin', 'polyuréthane', 'household', 'digital', 'book', 'fabric', 'gelatin', 'sump', 'vinyl', 'polyester', 'dye', 'offsetlithographin', 'monoprint', 'gypsum', 'printed', 'vegetable', 'archival', 'wool', 'digitally', 'enamel', 'monotype', 'cardboard', 'roll', 'woodcut', 'cement', 'lacquer', 'mirrored', 'oilstick', 'collage', 'pva', 'metal', 'bamboo', 'wires', 'wire', 'sand', 'epson', 'ektacolor', 'unframed', 'peacock', 'plastic', 'flashe', 'water', 'oxidation', 'porcelain', 'molave', 'handknitted', 'embossed', 'handembellished', 'polystone', 'silk', 'vacuumformed', 'cobblestones', 'feathers', 'diasecmounted', 'ukiyoe', 'giclee', 'giclée', 'handpainted', 'homemade', 'cotton', 'duratran', 'indian', 'varnished', 'handcut', 'fibreglass', 'vintage', 'humbrol'],
'Traditional and Specialized Techniques': ['This category contains artworks made of','silkscreen', 'encaustic', 'terracotta', 'leather', 'screen', 'lambda', 'glass'],
'Photography and Digital Art': ['This category contains artworks made of','ilfochrome', 'chromogenic', 'ink', 'cibachrome', 'photogram', 'inkjet', 'stencil', 'tar', 'photographic', 'epoxy', 'plaster'],
'Sculpting and Carving': ['This category contains artworks made of','graphite', 'wax', 'chalk'],
'Unconventional and Unique Techniques': ['This category contains artworks made of','language', 'led', 'electronic', 'neon', 'kinetic'],
'Other Specific Techniques or Materials': ['This category contains artworks made of','Other Specific Techniques or Materials: Any material that doesnt fit neatly into the above categories']
}
# Checkbox for showing more information
show_more_info = st.checkbox("Show More Information")
# Display more information for selected categories if the checkbox is selected
if show_more_info:
st.write("Select Categories for More Information:")
selected_info_categories = st.multiselect("Select Categories", categories)
# Display additional information for the selected categories
for category_id in selected_info_categories:
st.write(f"Additional Information for Category {category_id}:")
st.write(categories.get(category_id, "No information available"))
# Map selected categories to their corresponding values
selected_values = [filter_options[category] for category in selected_categories]
# Filter the DataFrame based on selected values
filtered_4_df = df[df['Material Category'].isin(selected_values)]
# Display the filtered DataFrame
st.write("Filtered Artworks by Material Category:")
st.write(filtered_4_df)
# Counter for the number of artworks in the selected range
num_artworks_material = filtered_4_df.shape[0]
st.markdown(f"Number of Artworks in Selected Range: **{num_artworks_material}**")
###Section 2 for making predictions
st.title('Artwork Sales Prediction App')
def load_data():
data = pd.read_csv(r"C:\Users\loren\OneDrive\Documents\5 Semester\CS\Groupe project\My first appprosper_data_app_dev.csv")
return (data.dropna())
def load_model():
filename = r"C:\Users\loren\OneDrive\Documents\5 Semester\CS\Groupe project\finalized_artworks_prediction.sav"
loaded_model = pickle.load(open(filename, "rb"))
return (loaded_model)
# Load Data and Model
data = load_data()
model = load_model()
data
# User input
birth_year_input = st.number_input('year_artist_born', min_value=0, max_value=2023, step=1)
age_input = st.number_input('age', min_value = 0, max_value = 120, step = 1)
length_input = st.number_input('length', min_value=0.00, step= 0.001)
width_input = st.number_input('width', min_value=0.00, step= 0.001)
height_input = st.number_input('height', min_value=0.00, step=0.001)
creation_year_input = st.number_input('year_of_work', min_value=0, max_value=2023, step=1)
age_at_creation_input = st.number_input('age_at_work', min_value=0, max_value=120, step=1)
lower_est_usd_input = st.number_input('lower_est_USD', min_value=0.00, step=0.01)
upper_est_usd_input = st.number_input('upper_est_USD', min_value=0.00, step=0.01)
material_category_input = st.selectbox('Material_Category', filter_options.values()) #Here we must specify that it is the values, otherwise the code will take the keys
act_area_input = length_input*width_input*height_input
posthumous_combined_input = st.selectbox('posthumous_combined', [0, 1])
artist_rank_input = st.number_input('artist_rank', min_value = 0 , step = 1)
# Combine user inputs into a DataFrame for prediction
user_input_df = pd.DataFrame({
'year_artist_born': [birth_year_input],
'age' : [age_input],
'length': [length_input],
'width': [width_input],
'height': [height_input],
'year_of_work': [creation_year_input],
'age_at_work': [age_at_creation_input],
'lower_est_USD': [lower_est_usd_input],
'upper_est_USD': [upper_est_usd_input],
'Material_Category': [material_category_input],
'act_area' : [act_area_input],
'posthumous_combined': [posthumous_combined_input],
'artist_rank' : [artist_rank_input],
})
input_prediction = model.predict(user_input_df)
st.write(input_prediction)