-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
75 lines (51 loc) · 2.64 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import xarray as xr
import numpy as np
import pandas as pd
import pytz
from datetime import datetime,timedelta
from typing import List, Dict, Callable
def epoch_time_to_datetime64(ds: xr.Dataset, old_time_label: str) -> xr.Dataset:
# Set timezone to Finland
helsinki_tz = pytz.timezone('Europe/Helsinki')
new_time = datetime(1970,1,1)+pd.to_timedelta(pd.Series(ds[old_time_label].data),unit='D')
#new_time = np.array([datetime.fromtimestamp(int(t*24*60*60), tz=helsinki_tz) for t in ds[old_time_label]])
ds[old_time_label] = new_time
return ds
def datetime64_to_timestamp(ds: xr.Dataset, old_time_label: str) -> xr.Dataset:
helsinki_tz = pytz.timezone('Europe/Helsinki')
new_time = np.array([pd.Timestamp(t).tz_localize(helsinki_tz, ambiguous=True, nonexistent='shift_forward') for t in ds[old_time_label].values])
ds[old_time_label] = new_time
return ds
def df_to_dataset(dataframes: List[pd.DataFrame], concat_dim=None, renamer: Dict = None):
ds = xr.Dataset.from_dataframe(dataframes[0])
if len(dataframes) > 1 and concat_dim is not None:
for i, df in enumerate(dataframes[1:]):
ds = xr.concat((ds, xr.Dataset.from_dataframe(df)), dim=concat_dim)
if renamer is not None:
ds = ds.rename(renamer)
return ds
def load_dataset(path: Callable, years, concat_dim=None):
dataset = xr.open_dataset(path(years[0]))
if len(years) > 1 and concat_dim is not None:
for year in years[1:]:
dataset = xr.concat((dataset, xr.open_dataset(path(year))), dim=concat_dim)
return dataset
def load_csv(path: Callable, years: List[int]):
dataframes = []
dataframes.append(pd.read_csv(path(years[0]), skiprows=[1], index_col=0, date_parser=lambda x: pd.to_datetime(x)))
if len(years) > 1:
for year in years[1:]:
dataframes.append(pd.read_csv(path(year), skiprows=[1], index_col=0, date_parser=lambda x: pd.to_datetime(x)))
return dataframes
def load_Ran_ec_data(onedrive_path: str, years: List[int]) -> xr.Dataset:
def path(x): return f'{onedrive_path}EC/EC_flux_L2_final_FI-Ran_{x}.nc'
ec_data = load_dataset(path, years, concat_dim='time')
ec_data = epoch_time_to_datetime64(ec_data, 'time')
return ec_data
def load_Ran_biomet_data(onedrive_path: str, years: List[int]) -> xr.Dataset:
def path(x): return f'{onedrive_path}biomet/biomet_final_30min_FI-Ran_{x}.csv'
csv_data = load_csv(path, years)
biomet_data = df_to_dataset(csv_data, concat_dim='TIMESTAMP_1', renamer={'TIMESTAMP_1': 'time'})
# Replace -9999 with NaN
biomet_data = biomet_data.where(biomet_data != -9999, np.nan)
return biomet_data