7
7
def calculate_total_cost (row ):
8
8
"""
9
9
Calculate new column for total cost by checking if total_with_options_per_unit is present or not.
10
+ if not, then calculate using contract_unit_price.
10
11
to be used with .assign()
11
12
"""
12
13
if row ["total_with_options_per_unit" ] > 0 :
13
14
return row ["total_with_options_per_unit" ] * row ["quantity" ]
14
15
else :
15
16
return row ["contract_unit_price" ] * row ["quantity" ]
16
-
17
- def new_bus_size_finder (item_description ):
17
+
18
+
19
+ def new_bus_size_finder (description : str ) -> str :
18
20
"""
19
21
Similar to prop_type_find, matches keywords to item description col and return standardized bus size type.
22
+ now includes variable that make description input lowercase.
20
23
To be used with .assign()
21
24
"""
22
25
23
26
articulated_list = [
24
- "60-foot" ,
25
27
"60 foot" ,
26
28
"articulated" ,
27
29
]
28
30
29
31
standard_bus_list = [
30
- "30- foot" ,
32
+ "30 foot" ,
31
33
"35 foot" ,
32
- "35 Foot" ,
33
34
"40 foot" ,
34
- "40-foot" ,
35
- "40 Foot" ,
36
35
"40ft" ,
37
36
"45 foot" ,
38
- "45-foot" ,
39
37
"standard" ,
40
38
]
41
39
@@ -46,84 +44,83 @@ def new_bus_size_finder(item_description):
46
44
other_bus_size_list = ["feeder bus" ]
47
45
48
46
otr_bus_list = [
49
- "coach- style" ,
50
- "over- the- road" ,
47
+ "coach style" ,
48
+ "over the road" ,
51
49
]
52
50
51
+ item_description = description .lower ().replace ("-" , " " ).strip ()
52
+
53
53
if any (word in item_description for word in articulated_list ):
54
54
return "articulated"
55
55
56
- if any (word in item_description for word in standard_bus_list ):
56
+ elif any (word in item_description for word in standard_bus_list ):
57
57
return "standard/conventional (30ft-45ft)"
58
58
59
- if any (word in item_description for word in cutaway_list ):
59
+ elif any (word in item_description for word in cutaway_list ):
60
60
return "cutaway"
61
61
62
- if any (word in item_description for word in otr_bus_list ):
62
+ elif any (word in item_description for word in otr_bus_list ):
63
63
return "over-the-road"
64
64
65
- if any (word in item_description for word in other_bus_size_list ):
65
+ elif any (word in item_description for word in other_bus_size_list ):
66
66
return "other"
67
67
68
- return "not specified"
68
+ else :
69
+ return "not specified"
69
70
70
71
71
72
# new prop_finder function
72
- def new_prop_finder (item_description ) :
73
+ def new_prop_finder (description : str ) -> str :
73
74
"""
74
75
function that matches keywords from each propulsion type list against the item description col, returns a standardized prop type
76
+ now includes variable that make description input lowercase.
75
77
to be used with .assign()
76
78
"""
77
79
78
80
BEB_list = [
79
81
"battery electric" ,
80
- "battery-electric" ,
81
82
"BEBs paratransit buses" ,
82
- "Battery Electric Bus" ,
83
83
"battery electric bus" ,
84
+ 'battery electric buses' ,
85
+ 'battery electric buses' ,
84
86
]
85
87
86
88
cng_list = [
87
- "CNG buses" ,
88
- "CNG fueled" ,
89
- "estimated-CNG buses" ,
90
- "low emission CNG " ,
89
+ "cng buses" ,
90
+ "cng fueled" ,
91
+ "estimated cng buses" ,
92
+ "low emission cng " ,
91
93
]
92
94
93
95
electric_list = [
94
96
"electric buses" ,
95
97
"electric commuter" ,
96
- "Electric" ,
97
98
"electric" ,
98
99
]
99
100
100
101
FCEB_list = [
101
102
"fuel cell" ,
102
- "fuel-cell" ,
103
103
"fuel cell electric" ,
104
104
"hydrogen fuel cell" ,
105
- "Fuel Cell Electric Bus" ,
106
105
"fuel cell electric bus" ,
107
- "Hydrogen Electic Bus" ,
108
106
"hydrogen electric bus" ,
109
107
]
110
108
111
109
# low emission (hybrid)
112
110
hybrid_list = [
113
111
"diesel electric hybrids" ,
114
- "diesel-electric" ,
115
112
"diesel-electric hybrids" ,
116
- "hybrid" ,
117
113
"hybrid electric" ,
118
114
"hybrid electric buses" ,
119
115
"hybrid electrics" ,
116
+ "hybrid" ,
120
117
]
121
118
122
119
# low emission (propane)
123
120
propane_list = [
124
- "propane" ,
125
121
"propane buses" ,
126
122
"propaned powered vehicles" ,
123
+ "propane" ,
127
124
]
128
125
129
126
mix_beb_list = [
@@ -142,44 +139,48 @@ def new_prop_finder(item_description):
142
139
]
143
140
144
141
zero_e_list = [
145
- "zero‐emission" ,
146
- "zero-emission buses" ,
147
- "zero emission" ,
148
142
"zero emission buses" ,
149
143
"zero emission electric" ,
144
+ "zero emission vehicles" ,
145
+ "zero emission" ,
150
146
]
151
147
152
- if any (word in item_description for word in BEB_list ):
153
- return "BEB"
148
+ item_description = description .lower ().replace ("-" "" , " " ).replace ("‐" , " " ).strip ()
154
149
155
- if any (word in item_description for word in cng_list ):
156
- return "CNG"
150
+ if any (word in item_description for word in BEB_list ) and not any (
151
+ word in item_description for word in ["diesel" , "hybrid" , "fuel cell" ]
152
+ ):
153
+ return "BEB"
157
154
158
- if any (word in item_description for word in FCEB_list ):
155
+ elif any (word in item_description for word in FCEB_list ):
159
156
return "FCEB"
160
157
161
- if any (word in item_description for word in hybrid_list ):
158
+ elif any (word in item_description for word in hybrid_list ):
162
159
return "low emission (hybrid)"
163
160
164
- if any (word in item_description for word in propane_list ):
165
- return "low emission (propane)"
166
-
167
- if any (word in item_description for word in mix_beb_list ):
161
+ elif any (word in item_description for word in mix_beb_list ):
168
162
return "mix (BEB and FCEB)"
169
163
170
- if any (word in item_description for word in mix_lowe_list ):
164
+ elif any (word in item_description for word in mix_lowe_list ):
171
165
return "mix (low emission)"
172
166
173
- if any (word in item_description for word in mix_zero_low_list ):
167
+ elif any (word in item_description for word in mix_zero_low_list ):
174
168
return "mix (zero and low emission)"
175
169
176
- if any (word in item_description for word in zero_e_list ):
170
+ elif any (word in item_description for word in zero_e_list ):
177
171
return "zero-emission bus (not specified)"
178
172
179
- if any (word in item_description for word in electric_list ):
173
+ elif any (word in item_description for word in propane_list ):
174
+ return "low emission (propane)"
175
+
176
+ elif any (word in item_description for word in electric_list ):
180
177
return "electric (not specified)"
178
+
179
+ elif any (word in item_description for word in cng_list ):
180
+ return "CNG"
181
181
182
- return "not specified"
182
+ else :
183
+ return "not specified"
183
184
184
185
185
186
# included assign columns
@@ -226,6 +227,7 @@ def clean_dgs_columns() -> pd.DataFrame:
226
227
"total_with_options_per_unit" ,
227
228
"grand_total" ,
228
229
]
230
+
229
231
# read in data
230
232
dgs_17c = pd .read_excel (f"{ gcs_path } { file_17c } " , sheet_name = sheet_17c )
231
233
dgs_17b = pd .read_excel (f"{ gcs_path } { file_17b } " , sheet_name = sheet_17b )
@@ -242,19 +244,18 @@ def clean_dgs_columns() -> pd.DataFrame:
242
244
243
245
# takes list of columns and updates to int64
244
246
dgs_17bc [to_int64 ] = dgs_17bc [to_int64 ].astype ("int64" )
245
-
247
+
246
248
# change purchase_order_number col to str
247
- dgs_17bc [' purchase_order_number' ] = dgs_17bc [' purchase_order_number' ].astype (' str' )
249
+ dgs_17bc [" purchase_order_number" ] = dgs_17bc [" purchase_order_number" ].astype (" str" )
248
250
249
251
# adds 3 new columns from functions
250
252
dgs_17bc2 = dgs_17bc .assign (
251
253
total_cost = dgs_17bc .apply (calculate_total_cost , axis = 1 ),
252
254
new_prop_type = dgs_17bc ["item_description" ].apply (new_prop_finder ),
253
255
new_bus_size = dgs_17bc ["item_description" ].apply (new_bus_size_finder ),
254
256
)
255
-
256
- return dgs_17bc2
257
257
258
+ return dgs_17bc2
258
259
259
260
def agg_by_agency (df : pd .DataFrame ) -> pd .DataFrame :
260
261
"""
@@ -338,8 +339,8 @@ def agg_by_agency_w_options(df: pd.DataFrame) -> pd.DataFrame:
338
339
339
340
return merge
340
341
341
-
342
342
if __name__ == "__main__" :
343
+
343
344
# initial df
344
345
df1 = clean_dgs_columns ()
345
346
0 commit comments