@@ -51,7 +51,11 @@ def __iter__(self):
5151def load_everything_into_ram (pv_power_filename , pv_metadata_filename ) -> xr .DataArray :
5252 """Open AND load PV data into RAM."""
5353 # Load pd.DataFrame of power and pd.Series of capacities:
54- pv_power_watts , pv_capacity_watt_power , pv_system_row_number = _load_pv_power_watts_and_capacity_watt_power (
54+ (
55+ pv_power_watts ,
56+ pv_capacity_watt_power ,
57+ pv_system_row_number ,
58+ ) = _load_pv_power_watts_and_capacity_watt_power (
5559 pv_power_filename ,
5660 )
5761 pv_metadata = _load_pv_metadata (pv_metadata_filename )
@@ -108,7 +112,8 @@ def _load_pv_power_watts_and_capacity_watt_power(
108112 pv_capacity_watt_power .index = [np .int32 (col ) for col in pv_capacity_watt_power .index ]
109113 pv_power_watts .columns = pv_power_watts .columns .astype (np .int64 )
110114
111- # Create pv_system_row_number. We use the index of `pv_capacity_watt_power` because that includes
115+ # Create pv_system_row_number. We use the index of
116+ # `pv_capacity_watt_power` because that includes
112117 # the PV system IDs for the entire dataset (independent of `start_date` and `end_date`).
113118 # We use `float32` for the ID because we use NaN to indicate a missing PV system,
114119 # or that this whole example doesn't include PV.
@@ -134,7 +139,9 @@ def _load_pv_power_watts_and_capacity_watt_power(
134139
135140 # Drop any PV systems whose PV capacity is too low:
136141 PV_CAPACITY_THRESHOLD_W = 100
137- pv_systems_to_drop = pv_capacity_watt_power .index [pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W ]
142+ pv_systems_to_drop = pv_capacity_watt_power .index [
143+ pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W
144+ ]
138145 pv_systems_to_drop = pv_systems_to_drop .intersection (pv_power_watts .columns )
139146 _log .info (
140147 f"Dropping { len (pv_systems_to_drop )} PV systems because their max power is less than"
@@ -164,53 +171,6 @@ def _load_pv_power_watts_and_capacity_watt_power(
164171 return pv_power_watts , pv_capacity_watt_power , pv_system_row_number
165172
166173
167- """Filtering to be added in a different IterDataPipe
168-
169- pv_power_watts = pv_power_watts.clip(lower=0, upper=5e7)
170- # Convert the pv_system_id column names from strings to ints:
171- pv_power_watts.columns = [np.int32(col) for col in pv_power_watts.columns]
172-
173- if "passiv" not in filename:
174- _log.warning("Converting timezone. ARE YOU SURE THAT'S WHAT YOU WANT TO DO?")
175- pv_power_watts = (
176- pv_power_watts.tz_localize("Europe/London").tz_convert("UTC").tz_convert(None)
177- )
178-
179- pv_power_watts = _drop_pv_systems_which_produce_overnight(pv_power_watts)
180-
181- # Resample to 5-minutely and interpolate up to 15 minutes ahead.
182- # TODO: Issue #74: Give users the option to NOT resample (because Perceiver IO
183- # doesn't need all the data to be perfectly aligned).
184- pv_power_watts = pv_power_watts.resample("5T").interpolate(method="time", limit=3)
185- pv_power_watts.dropna(axis="index", how="all", inplace=True)
186- pv_power_watts.dropna(axis="columns", how="all", inplace=True)
187-
188- # Drop any PV systems whose PV capacity is too low:
189- PV_CAPACITY_THRESHOLD_W = 100
190- pv_systems_to_drop = pv_capacity_watt_power.index[pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W]
191- pv_systems_to_drop = pv_systems_to_drop.intersection(pv_power_watts.columns)
192- _log.info(
193- f"Dropping {len(pv_systems_to_drop)} PV systems because their max power is less than"
194- f" {PV_CAPACITY_THRESHOLD_W}"
195- )
196- pv_power_watts.drop(columns=pv_systems_to_drop, inplace=True)
197-
198- # Ensure that capacity and pv_system_row_num use the same PV system IDs as the power DF:
199- pv_system_ids = pv_power_watts.columns
200- pv_capacity_watt_power = pv_capacity_watt_power.loc[pv_system_ids]
201- pv_system_row_number = pv_system_row_number.loc[pv_system_ids]
202-
203- _log.info(
204- "After filtering & resampling to 5 minutes:"
205- f" pv_power = {pv_power_watts.values.nbytes / 1e6:,.1f} MBytes."
206- f" {len(pv_power_watts)} PV power datetimes."
207- f" {len(pv_power_watts.columns)} PV power PV system IDs."
208- )
209-
210-
211- """
212-
213-
214174# Adapted from nowcasting_dataset.data_sources.pv.pv_data_source
215175def _load_pv_metadata (filename : str ) -> pd .DataFrame :
216176 """Return pd.DataFrame of PV metadata.
0 commit comments