Skip to content

Commit

Permalink
fix netcdf with quater hourly index (#220)
Browse files Browse the repository at this point in the history
  • Loading branch information
danangmassandy authored Oct 24, 2024
1 parent 3414425 commit a94d5c5
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 14 deletions.
18 changes: 18 additions & 0 deletions django_project/gap/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,24 @@ class DatasetTimeStep:
DAILY = 'DAILY'
OTHER = 'OTHER'

@classmethod
def to_freq(cls, time_step: str) -> str:
"""Convert time_step to pandas frequency.
:param time_step: One of DatasetTimeStep
:type time_step: str
:return: frequency
:rtype: str
"""
if time_step == DatasetTimeStep.DAILY:
return 'D'
elif time_step == DatasetTimeStep.HOURLY:
return 'h'
elif time_step == DatasetTimeStep.QUARTER_HOURLY:
return '15min'
else:
raise ValueError(f'Unsupported time_step {time_step}')


class DatasetObservationType:
"""Observation type of data source."""
Expand Down
32 changes: 27 additions & 5 deletions django_project/gap/providers/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,35 @@ def to_csv_stream(self, suffix='.csv', separator=','):
# write row
yield bytes(','.join(data) + '\n', 'utf-8')

def _get_date_array(self):
"""Get date range from result values."""
dataset = self.attributes[0].dataset

first_dt = self.values[0].get_datetime()
last_dt = self.values[-1].get_datetime()

if dataset.time_step == DatasetTimeStep.DAILY:
first_dt = first_dt.date()
last_dt = last_dt.date()

return pd.date_range(
first_dt, last_dt,
freq=DatasetTimeStep.to_freq(dataset.time_step))

def _get_date_index(
self, date_array: pd.DatetimeIndex, datetime: datetime):
"""Get date index from date_array."""
dataset = self.attributes[0].dataset
dt = (
datetime.replace(hour=0, minute=0, second=0, tzinfo=None) if
dataset.time_step == DatasetTimeStep.DAILY else datetime
)
return date_array.get_loc(dt)

def to_netcdf_stream(self):
"""Generate NetCDF."""
# create date array
date_array = pd.date_range(
self.start_date.date().isoformat(),
self.end_date.date().isoformat()
)
date_array = self._get_date_array()

# sort lat and lon array
lat_array = set()
Expand Down Expand Up @@ -173,7 +195,7 @@ def to_netcdf_stream(self):

# assign values to the dataset
for val in self.values:
date_idx = date_array.get_loc(val.get_datetime_repr('%Y-%m-%d'))
date_idx = self._get_date_index(date_array, val.get_datetime())
loc = val.location
lat_idx = lat_array.get_loc(round(loc.y, 5))
lon_idx = lon_array.get_loc(round(loc.x, 5))
Expand Down
19 changes: 18 additions & 1 deletion django_project/gap/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
Country,
Measurement,
CollectorSession,
IngestorType
IngestorType,
DatasetTimeStep
)


Expand Down Expand Up @@ -169,3 +170,19 @@ def test_collector_session(self):
session.delete()
self.assertFalse(
CollectorSession.objects.filter(id=session.id).exists())


class DatasetTimeStepTest(TestCase):
"""DatasetTimeStep test case."""

def test_convert_freq(self):
"""Test to_freq function."""
freq = DatasetTimeStep.to_freq(DatasetTimeStep.DAILY)
self.assertEqual(freq, 'D')
freq = DatasetTimeStep.to_freq(DatasetTimeStep.HOURLY)
self.assertEqual(freq, 'h')
freq = DatasetTimeStep.to_freq(DatasetTimeStep.QUARTER_HOURLY)
self.assertEqual(freq, '15min')
with self.assertRaises(ValueError) as ctx:
DatasetTimeStep.to_freq(DatasetTimeStep.OTHER)
self.assertIn('Unsupported time_step', str(ctx.exception))
22 changes: 15 additions & 7 deletions django_project/gap/utils/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,11 @@ def _datetime_as_str(self):
self.datetime, unit='s', timezone='UTC')
return self.datetime.isoformat(timespec='seconds')

def get_datetime_repr(self, format: str) -> str:
"""Return the representation of datetime in given format.
def get_datetime(self) -> datetime:
"""Get datetime value.
:param format: Format like '%Y-%m-%d'
:type format: str
:return: String of datetime
:rtype: str
:return: parsed datetime
:rtype: datetime
"""
dt = self.datetime
if isinstance(self.datetime, np.datetime64):
Expand All @@ -220,7 +218,17 @@ def get_datetime_repr(self, format: str) -> str:
np.timedelta64(1, 's')
)
dt = datetime.fromtimestamp(timestamp, tz=pytz.UTC)
return dt.strftime(format)
return dt

def get_datetime_repr(self, format: str) -> str:
"""Return the representation of datetime in given format.
:param format: Format like '%Y-%m-%d'
:type format: str
:return: String of datetime
:rtype: str
"""
return self.get_datetime().strftime(format)

def to_dict(self):
"""Convert into dict.
Expand Down
4 changes: 3 additions & 1 deletion django_project/gap_api/api_views/measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,9 @@ def get_response_data(self) -> Response:
data={}
)

dataset_attributes = DatasetAttribute.objects.filter(
dataset_attributes = DatasetAttribute.objects.select_related(
'dataset'
).filter(
attribute__in=attributes,
dataset__is_internal_use=False,
attribute__is_active=True
Expand Down

0 comments on commit a94d5c5

Please sign in to comment.