Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

Commit 8735ff2

Browse files
committed
Fix lint errors
1 parent d7e8abd commit 8735ff2

19 files changed

+128
-89
lines changed

ocf_datapipes/convert/gsp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def __iter__(self) -> NumpyBatch:
2727
BatchKey.gsp: xr_data.values,
2828
BatchKey.gsp_t0_idx: xr_data.attrs["t0_idx"],
2929
BatchKey.gsp_id: xr_data.gsp_id.values,
30-
BatchKey.gsp_capacity_megawatt_power: xr_data.isel(time_utc=0)["capacity_megawatt_power"].values,
30+
BatchKey.gsp_capacity_megawatt_power: xr_data.isel(time_utc=0)[
31+
"capacity_megawatt_power"
32+
].values,
3133
BatchKey.gsp_time_utc: datetime64_to_float(xr_data["time_utc"].values),
3234
}
3335

ocf_datapipes/load/gsp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ def __iter__(self) -> xr.DataArray:
8585
# TODO: Try using `gsp_id_to_shape.geometry.envelope.centroid`. See issue #76.
8686
x_osgb=gsp_id_to_shape.geometry.centroid.x.astype(np.float32),
8787
y_osgb=gsp_id_to_shape.geometry.centroid.y.astype(np.float32),
88-
capacity_megawatt_power=gsp_pv_power_mw_ds.installedcapacity_mwp.data.astype(np.float32),
88+
capacity_megawatt_power=gsp_pv_power_mw_ds.installedcapacity_mwp.data.astype( # noqa
89+
np.float32
90+
),
8991
)
9092

9193
del gsp_id_to_shape, gsp_pv_power_mw_ds

ocf_datapipes/load/pv/pv.py

Lines changed: 10 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@ def __iter__(self):
5151
def load_everything_into_ram(pv_power_filename, pv_metadata_filename) -> xr.DataArray:
5252
"""Open AND load PV data into RAM."""
5353
# Load pd.DataFrame of power and pd.Series of capacities:
54-
pv_power_watts, pv_capacity_watt_power, pv_system_row_number = _load_pv_power_watts_and_capacity_watt_power(
54+
(
55+
pv_power_watts,
56+
pv_capacity_watt_power,
57+
pv_system_row_number,
58+
) = _load_pv_power_watts_and_capacity_watt_power(
5559
pv_power_filename,
5660
)
5761
pv_metadata = _load_pv_metadata(pv_metadata_filename)
@@ -108,7 +112,8 @@ def _load_pv_power_watts_and_capacity_watt_power(
108112
pv_capacity_watt_power.index = [np.int32(col) for col in pv_capacity_watt_power.index]
109113
pv_power_watts.columns = pv_power_watts.columns.astype(np.int64)
110114

111-
# Create pv_system_row_number. We use the index of `pv_capacity_watt_power` because that includes
115+
# Create pv_system_row_number. We use the index of
116+
# `pv_capacity_watt_power` because that includes
112117
# the PV system IDs for the entire dataset (independent of `start_date` and `end_date`).
113118
# We use `float32` for the ID because we use NaN to indicate a missing PV system,
114119
# or that this whole example doesn't include PV.
@@ -134,7 +139,9 @@ def _load_pv_power_watts_and_capacity_watt_power(
134139

135140
# Drop any PV systems whose PV capacity is too low:
136141
PV_CAPACITY_THRESHOLD_W = 100
137-
pv_systems_to_drop = pv_capacity_watt_power.index[pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W]
142+
pv_systems_to_drop = pv_capacity_watt_power.index[
143+
pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W
144+
]
138145
pv_systems_to_drop = pv_systems_to_drop.intersection(pv_power_watts.columns)
139146
_log.info(
140147
f"Dropping {len(pv_systems_to_drop)} PV systems because their max power is less than"
@@ -164,53 +171,6 @@ def _load_pv_power_watts_and_capacity_watt_power(
164171
return pv_power_watts, pv_capacity_watt_power, pv_system_row_number
165172

166173

167-
"""Filtering to be added in a different IterDataPipe
168-
169-
pv_power_watts = pv_power_watts.clip(lower=0, upper=5e7)
170-
# Convert the pv_system_id column names from strings to ints:
171-
pv_power_watts.columns = [np.int32(col) for col in pv_power_watts.columns]
172-
173-
if "passiv" not in filename:
174-
_log.warning("Converting timezone. ARE YOU SURE THAT'S WHAT YOU WANT TO DO?")
175-
pv_power_watts = (
176-
pv_power_watts.tz_localize("Europe/London").tz_convert("UTC").tz_convert(None)
177-
)
178-
179-
pv_power_watts = _drop_pv_systems_which_produce_overnight(pv_power_watts)
180-
181-
# Resample to 5-minutely and interpolate up to 15 minutes ahead.
182-
# TODO: Issue #74: Give users the option to NOT resample (because Perceiver IO
183-
# doesn't need all the data to be perfectly aligned).
184-
pv_power_watts = pv_power_watts.resample("5T").interpolate(method="time", limit=3)
185-
pv_power_watts.dropna(axis="index", how="all", inplace=True)
186-
pv_power_watts.dropna(axis="columns", how="all", inplace=True)
187-
188-
# Drop any PV systems whose PV capacity is too low:
189-
PV_CAPACITY_THRESHOLD_W = 100
190-
pv_systems_to_drop = pv_capacity_watt_power.index[pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W]
191-
pv_systems_to_drop = pv_systems_to_drop.intersection(pv_power_watts.columns)
192-
_log.info(
193-
f"Dropping {len(pv_systems_to_drop)} PV systems because their max power is less than"
194-
f" {PV_CAPACITY_THRESHOLD_W}"
195-
)
196-
pv_power_watts.drop(columns=pv_systems_to_drop, inplace=True)
197-
198-
# Ensure that capacity and pv_system_row_num use the same PV system IDs as the power DF:
199-
pv_system_ids = pv_power_watts.columns
200-
pv_capacity_watt_power = pv_capacity_watt_power.loc[pv_system_ids]
201-
pv_system_row_number = pv_system_row_number.loc[pv_system_ids]
202-
203-
_log.info(
204-
"After filtering & resampling to 5 minutes:"
205-
f" pv_power = {pv_power_watts.values.nbytes / 1e6:,.1f} MBytes."
206-
f" {len(pv_power_watts)} PV power datetimes."
207-
f" {len(pv_power_watts.columns)} PV power PV system IDs."
208-
)
209-
210-
211-
"""
212-
213-
214174
# Adapted from nowcasting_dataset.data_sources.pv.pv_data_source
215175
def _load_pv_metadata(filename: str) -> pd.DataFrame:
216176
"""Return pd.DataFrame of PV metadata.

ocf_datapipes/load/pv/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ def put_pv_data_into_an_xr_dataarray(
3535
ints), and the index is UTC datetime.
3636
x_osgb: The x location. Index = PV system ID ints.
3737
y_osgb: The y location. Index = PV system ID ints.
38-
capacity_watt_power: The max power output of each PV system in Watts. Index = PV system ID ints.
38+
capacity_watt_power: The max power output of each PV system in Watts.
39+
Index = PV system ID ints.
3940
pv_system_row_number: The integer position of the PV system in the metadata.
4041
Used to create the PV system ID embedding.
4142
"""

ocf_datapipes/production/power_perceiver.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ def power_perceiver_production_datapipe(configuration_filename: Union[Path, str]
111111
)
112112
.fork(2)
113113
)
114-
topo_datapipe = topo_datapipe.reproject_topography().normalize(calculate_mean_std_from_example=True)
114+
topo_datapipe = topo_datapipe.reproject_topography().normalize(
115+
calculate_mean_std_from_example=True
116+
)
115117
sat_hrv_datapipe, sat_t0_datapipe = (
116118
sat_hrv_datapipe.convert_satellite_to_int8()
117119
.add_t0_idx_and_sample_period_duration(

ocf_datapipes/select/select_pv_systems_on_capacity.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ def __iter__(self) -> Union[xr.DataArray, xr.Dataset]:
4040
4141
# Drop any PV systems whose PV capacity is too low:
4242
PV_CAPACITY_THRESHOLD_W = 100
43-
pv_systems_to_drop = pv_capacity_watt_power.index[pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W]
43+
pv_systems_to_drop =
44+
pv_capacity_watt_power.index[pv_capacity_watt_power <= PV_CAPACITY_THRESHOLD_W]
4445
pv_systems_to_drop = pv_systems_to_drop.intersection(pv_power_watts.columns)
4546
_log.info(
4647
f"Dropping {len(pv_systems_to_drop)} PV systems because their max power is less than"

tests/batch/test_merge_numpy_modalities.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,17 @@ def test_merge_modalities(sat_hrv_datapipe, nwp_datapipe, gsp_datapipe, passiv_d
1414
batch_size = 4
1515

1616
sat_hrv_datapipe = AddT0IdxAndSamplePeriodDuration(
17-
sat_hrv_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(hours=1)
17+
sat_hrv_datapipe,
18+
sample_period_duration=timedelta(minutes=5),
19+
history_duration=timedelta(hours=1),
1820
)
1921
sat_hrv_datapipe = ConvertSatelliteToNumpyBatch(sat_hrv_datapipe, is_hrv=True)
2022
sat_hrv_datapipe = MergeNumpyExamplesToBatch(sat_hrv_datapipe, n_examples_per_batch=batch_size)
2123

2224
nwp_datapipe = AddT0IdxAndSamplePeriodDuration(
23-
nwp_datapipe, sample_period_duration=timedelta(minutes=30), history_duration=timedelta(hours=1)
25+
nwp_datapipe,
26+
sample_period_duration=timedelta(minutes=30),
27+
history_duration=timedelta(hours=1),
2428
)
2529
nwp_datapipe = ConvertNWPToNumpyBatch(nwp_datapipe)
2630
nwp_datapipe = MergeNumpyExamplesToBatch(nwp_datapipe, n_examples_per_batch=batch_size)
@@ -32,7 +36,9 @@ def test_merge_modalities(sat_hrv_datapipe, nwp_datapipe, gsp_datapipe, passiv_d
3236
gsp_datapipe = MergeNumpyExamplesToBatch(gsp_datapipe, n_examples_per_batch=batch_size)
3337

3438
passiv_datapipe = AddT0IdxAndSamplePeriodDuration(
35-
passiv_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(hours=1)
39+
passiv_datapipe,
40+
sample_period_duration=timedelta(minutes=5),
41+
history_duration=timedelta(hours=1),
3642
)
3743
passiv_datapipe = ConvertPVToNumpyBatch(passiv_datapipe)
3844
passiv_datapipe = MergeNumpyExamplesToBatch(passiv_datapipe, n_examples_per_batch=batch_size)

tests/convert/test_gsp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
def test_convert_gsp_to_numpy_batch(gsp_datapipe):
99
gsp_datapipe = AddT0IdxAndSamplePeriodDuration(
10-
gsp_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(minutes=60)
10+
gsp_datapipe,
11+
sample_period_duration=timedelta(minutes=5),
12+
history_duration=timedelta(minutes=60),
1113
)
1214
gsp_datapipe = ConvertGSPToNumpyBatch(gsp_datapipe)
1315
data = next(iter(gsp_datapipe))

tests/convert/test_nwp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
def test_convert_nwp_to_numpy_batch(nwp_datapipe):
1010
nwp_datapipe = AddT0IdxAndSamplePeriodDuration(
11-
nwp_datapipe, sample_period_duration=timedelta(minutes=60), history_duration=timedelta(minutes=60)
11+
nwp_datapipe,
12+
sample_period_duration=timedelta(minutes=60),
13+
history_duration=timedelta(minutes=60),
1214
)
1315
t0_datapipe = SelectLiveT0Time(nwp_datapipe, dim_name="init_time_utc")
1416

tests/convert/test_satellite.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
def test_convert_satellite_to_numpy_batch(sat_datapipe):
99

1010
sat_datapipe = AddT0IdxAndSamplePeriodDuration(
11-
sat_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(minutes=60)
11+
sat_datapipe,
12+
sample_period_duration=timedelta(minutes=5),
13+
history_duration=timedelta(minutes=60),
1214
)
1315
sat_datapipe = ConvertSatelliteToNumpyBatch(sat_datapipe, is_hrv=False)
1416
data = next(iter(sat_datapipe))
@@ -20,7 +22,9 @@ def test_convert_satellite_to_numpy_batch(sat_datapipe):
2022

2123
def test_convert_hrvsatellite_to_numpy_batch(sat_datapipe):
2224
sat_datapipe = AddT0IdxAndSamplePeriodDuration(
23-
sat_datapipe, sample_period_duration=timedelta(minutes=5), history_duration=timedelta(minutes=60)
25+
sat_datapipe,
26+
sample_period_duration=timedelta(minutes=5),
27+
history_duration=timedelta(minutes=60),
2428
)
2529
sat_datapipe = ConvertSatelliteToNumpyBatch(sat_datapipe, is_hrv=True)
2630
data = next(iter(sat_datapipe))

0 commit comments

Comments
 (0)