-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbc_funcs_livgrid_5y.py
411 lines (331 loc) · 16.4 KB
/
bc_funcs_livgrid_5y.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
# # # functions for bias correcting ICAR 2D CMIP runs # # # # #
# - generic version, no regridding. Takes timestep (1- or 3-hourly input data),
# and uses daily Obs data to quantile map.
#
# Reference data has what timestep?
#
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
import time
# from datetime import datetime
# import glob, os
import xarray as xr
import dask
import numpy as np
from random import randrange
import gc
import sys
# sys.path.append('/glade/u/home/bkruyt/libraries/storylines/storylines')
sys.path.append('/glade/u/home/bkruyt/libraries')
sys.path.append('/glade/u/home/bkruyt/libraries/st_lines')
sys.path
# import icar2gmet as i2g
# from storylines.tools import quantile_mapping
### If storylines gives an error , use py3_yifan kernel /conda env
# alternatively ../Example_scripts/2D_quantile_map/quantile_mapping.py
import quantile_mapping # from same folder.
# verbose=True # print more details on runtime
noise_path = "/glade/derecho/scratch/bkruyt/CMIP6"
NOISE_u = xr.open_dataset(f"{noise_path}/uniform_noise_480_480.nc" )
noise_val = 0.01
######################################################
############ FUNCTIONS ###############
######################################################
def add_noise(df, noise_val=noise_val, random=True):
"""Add uniform noise to a 2D dataset df."""
with dask.config.set(**{'array.slicing.split_large_chunks': True}):
u_noise = NOISE_u.uniform_noise #.load() # 55000 x 480 x480
# Add noise to (daily) input data:
# to avoid taking the same noise values every time, we add a random int.
t = len(df.time)
if random:
r = randrange(1000)
else:
r = 0
noise_arr = noise_val * u_noise[r:(r+t) , :df.shape[1], :df.shape[2] ]
df = xr.where( df>0, df + noise_val, noise_arr.values)
return df
######################################################
############### PRECIPITATION ##################
######################################################
def correct_precip(this_ds, dsObs, dsRef, bc_by_month=False, noise=True, verbose=False):
""" this ds and dsRef: datasets with the same precipiation variable, dsObs is DataArray with observations ('data to match') """
t0 = time.time()
if bc_by_month:
print(" - - - - - - - - Bias-correcting precipitation by month - - - - - - - - - ")
else:
print(" - - - - - - - - Bias-correcting precipitation by year - - - - - - - - - ")
# determine timestep:
tdelta_int = int(this_ds.time.dt.hour[1]-this_ds.time.dt.hour[0])
if tdelta_int ==0:
tdelta_int = int(this_ds.time.dt.hour[2]-this_ds.time.dt.hour[1])
if tdelta_int ==0:
tdelta_int=24
print(" tdelta_int=0 ; setting tdelta_int=24; daily timestep")
print(f" timestep of input data is {tdelta_int} hrs")
# determine the name of the hourly precip var: (make function)
if 'precip_hr' in this_ds.data_vars:
pcp_var = 'precip_hr'
elif 'precip_dt' in this_ds.data_vars:
pcp_var = 'precip_dt'
elif 'Prec' in this_ds.data_vars:
pcp_var = 'Prec'
elif 'precipitation' in this_ds.data_vars:
pcp_var = 'precipitation'
else:
print( 'define time-step precipitation variable!' )
t0 = time.time()
print(" loading data")
try: # pcp_var in this_ds.data_vars:
da_pcp = this_ds[pcp_var]
except:
da_pcp = this_ds
# dsObs = dsObs#.load()
print(" ", time.time() - t0)
if verbose:
try:
print(f" precipitation variable is {pcp_var}")
print(f" max input precipitation value is {np.nanmax(da_pcp.values) } kg m-2")
print(f" max obs. precipitation value is {np.nanmax(dsObs.values) } {dsObs.units}")
except:
print(f" precipitation variable is {pcp_var}")
t0 = time.time()
if tdelta_int<24:
print(" making daily precip")
daily = da_pcp.resample(time='1D').sum(dim='time').load()
else:
daily = da_pcp.load()
print(" ", time.time() - t0)
#______________ add noise _____________
t0 = time.time()
# Add noise to (daily) input and Reference data:
if noise:
print(f" adding noise to input and ref data with max val {noise_val}")
daily_n = add_noise(daily, noise_val=noise_val, random=True)
dsRef_n = add_noise(dsRef, noise_val=noise_val, random=True)
else:
daily_n = daily
dsRef_n = dsRef
print(" ! ! ! NOT adding noise to input and ref data ! ! !")
print(" ", time.time() - t0)
# _____________ Do the bias correction _____________
t0 = time.time()
print(" quantile mapping")
# NB: (very small) negative values are introduced by the bc procedure when setting extrapolate='1to1'. (This does not happen when dsRef=None. ). To prevent this we set extrapolate to max. (2024-04-18)
if verbose:
print(f" daily_n shape: {daily_n.shape}")
print(f" dsRef_n shape: {dsRef_n.shape}")
print(f" dsObs shape: {dsObs.shape}")
if bc_by_month:
bc_daily = quantile_mapping.quantile_mapping_by_group(
# def quantile_mapping_by_group(input_data, ref_data, data_to_match, (ref should exclude 5y input)
daily_n, dsRef_n, dsObs, # correct to liv grid
grouper='time.month', detrend=False, use_ref_data=True,# use_ref_data is not used! set ref_data=None to exclude ref data!
extrapolate="max", n_endpoints=50
)
else: #def quantile_mapping(input_data, ref_data, data_to_match,...
bc_daily = quantile_mapping.quantile_mapping(
daily_n, dsRef_n, dsObs, #daily, dsRef, dsObs,#
detrend=False, use_ref_data=True,
extrapolate="max", n_endpoints=50
)
print(" ", time.time() - t0)
if np.count_nonzero(bc_daily.values<0) >0:
print(f"\n ! ! ! {np.count_nonzero(daily_n.values<0)} negative precip values before bc ! ! ! ")
print(f" ! ! ! {np.count_nonzero(bc_daily.values<0)} negative precip values after bc ! ! ! ")
print(f" ! ! ! {np.nanmin(bc_daily.values)} minimum precip value after bc ! ! ! \n")
t0 = time.time()
if verbose:
# print(" da_pcp.isnull():",np.sum(da_pcp.isnull().values))
print(" da_pcp.shape:",da_pcp.shape)
print(" da_pcp == 0 :",np.sum(da_pcp.values==0))
#_______ apply correction to n-hourly data: ______
print(" applying correction to ",tdelta_int,"-hourly data")
# Calculate the correction array
correction = bc_daily.values / daily_n.values
# repeat for nr of timesteps per day:
correction = np.repeat(correction, (24 / tdelta_int), axis=0)
if np.count_nonzero(correction<0) >0 : print(f"\n ! ! ! {np.count_nonzero(correction<0)} negative correction values ! ! ! \n")
# Create a mask for elements where daily sum is zero and bc_daily is positive
mask = (daily == 0) & (bc_daily > 0)
mask = np.repeat(mask.values, (24 / tdelta_int), axis=0)
#__ Set values to scaled bc_daily where daily sum is zero and bc_daily is positive __
# method='zero': when resampled to daily again, the values 'line up"
bc_tdelta = bc_daily.interp(time=da_pcp.time, method='zero') /(24 / tdelta_int)
if verbose:
print(f" mask shape: {mask.shape}")
print(f" correction shape: {correction.shape} ") # \n
print(f" bc_tdelta shape: {bc_tdelta.shape}")
# print(f"\n bc_daily.time: {bc_daily.time}" )
# print(f" bc_tdelta.time: {bc_tdelta.time}" )
# print(f"\n bc_daily.values[:12]: {bc_daily.values[:12,150,150]}" )
# print(f" bc_tdelta.values[:12]: {bc_tdelta.values[:12, 150,150]}" )
# print(f"\n sum(bc_daily.sel(time=slice(1950-01-01,1950-01-10)).values): {sum(bc_daily.sel(time=slice('1950-01-01','1950-01-10'))[:,150,150].values)}" )
# print(f" sum(bc_tdelta.sel(time=slice('1950-01-01','1950-01-10')).values):{sum(bc_tdelta.sel(time=slice('1950-01-01','1950-01-10'))[:,150,150].values)}" )
# ______ Apply correction to (all) values ______
da_pcp *= correction[:len(da_pcp.time)] #subset to catch the possibility of the last year being not complete
# except where the original daily value was 0, here we set to the scaled bc_daily value:
da_pcp = da_pcp.where(~mask[:len(da_pcp.time)], bc_tdelta)
if verbose:
print(" da_pcp.shape():",da_pcp.shape)
print(" da_pcp == 0 :",np.sum(da_pcp.values==0))
print(" da_pcp.isnull():",np.sum(da_pcp.isnull().values))
print(" ", time.time() - t0)
return this_ds
######################################################
############### TEMPERATURE ####################
######################################################
def correct_temperature( this_ds, dsObs_tmin, dsObs_tmax, dsRef_tmin, dsRef_tmax, bc_by_month=False):
if bc_by_month:
print(" - - - - - - - - Bias-correcting temperature by month - - - - - - - - - ")
else:
print(" - - - - - - - - Bias-correcting temperature by year - - - - - - - - - ")
# determine time step:
tdelta_int = int(this_ds.time.dt.hour[1]-this_ds.time.dt.hour[0])
if tdelta_int ==0:
tdelta_int = int(this_ds.time.dt.hour[2]-this_ds.time.dt.hour[1])
mean_tdelta = np.mean(this_ds.time.dt.hour)
print(f" mean delta t: {np.round(mean_tdelta,2) }")
if tdelta_int ==0:
tdelta_int=24
print(" tdelta_int=0 ; setting tdelta_int=24; daily timestep")
print(f" timestep of input data is {tdelta_int} hrs")
t0 = time.time()
print(" loading temperature")
if 'ta2m' in this_ds.data_vars:
ta2m = this_ds["ta2m"].load()
if np.isnan(ta2m).any():
print('ta2m has nans! ')
print(" making daily tmin/tmax values from ta2m")
tmin = ta2m.resample(time='1D').min(dim='time').load()
tmax = ta2m.resample(time='1D').max(dim='time').load()
if np.isnan(tmin).any():
print('tmin has nans! ')
if np.isnan(tmax).any():
print('tmax has nans! ')
elif'Tmin' in this_ds.data_vars:
print(" found tmin/tmax values ")
tmin = this_ds["Tmin"].load()
if np.isnan(tmin).any():
print('tmin has nans! ')
tmax = this_ds["Tmax"].load()
if np.isnan(tmax).any():
print('tmax has nans! ')
else:
print(" Unclear which temperature variable to bias-correct. Stopping.")
exit()
dsRef_tmin = dsRef_tmin#.load()
dsRef_tmax = dsRef_tmax#.load()
dsObs_tmin = dsObs_tmin#.load() # should already be loaded
dsObs_tmax = dsObs_tmax#.load()
# if verbose:
print(f" min dsRef_tmin: {np.nanmin(dsRef_tmin.values)} ")
print(f" min dsObs_tmin: {np.nanmin(dsObs_tmin.values)} ")
print(f" min tmin before bc: {np.nanmin(tmin.values)} ")
print(" ", np.round(time.time() - t0, 2))
# - - - - QMAP - - - -
t0 = time.time()
if bc_by_month: ## NB def quantile_mapping_by_group(input_data, ref_data, data_to_match, grouper='time.month', **kwargs):
print(" quantile mapping t_min")
bc_tmin = quantile_mapping.quantile_mapping_by_group(
# tmin, icar_tmin, ltmin_on_icar, extrapolate="1to1", grouper='time.month' , n_endpoints=50
tmin, dsRef_tmin, dsObs_tmin, detrend=False, extrapolate="1to1", grouper='time.month' , n_endpoints=50
)
del dsObs_tmin ,dsRef_tmin
print(" quantile mapping t_max")
bc_tmax = quantile_mapping.quantile_mapping_by_group(
tmax, dsRef_tmax, dsObs_tmax, detrend=False, extrapolate="1to1", grouper='time.month' , n_endpoints=50
)
del dsObs_tmax #,dsRef_tmax
else:
print(" quantile mapping t_min")
bc_tmin = quantile_mapping.quantile_mapping(
tmin, dsRef_tmin, dsObs_tmin, detrend=False, extrapolate="1to1" , n_endpoints=50
)
del dsObs_tmin ,dsRef_tmin
print(" quantile mapping t_max")
bc_tmax = quantile_mapping.quantile_mapping(
tmax, dsRef_tmax, dsObs_tmax, detrend=False, extrapolate="1to1" , n_endpoints=50
)
del dsObs_tmax #,dsRef_tmax
print(" ", np.round(time.time() - t0, 2))
if np.isnan(bc_tmin).any():
print('bc_tmin has nans! ')
if np.isnan(bc_tmax).any():
print('bc_tmax has nans! ')
# make mask to crop ta2m with after bc:
try:
Tmask= dsRef_tmax.isel(time=1).values>0
except:
try:
Tmask= dsRef_tmax.isel(time=1).ta2m.values>0
except:
Tmask= dsRef_tmax.isel(time=1).Tmax.values>0
# apply Tmask to mask t values outside of domain:
try:
bc_tmin.values = bc_tmin.where( Tmask.__xarray_dataarray_variable__.values )
bc_tmax.values = bc_tmax.where( Tmask.__xarray_dataarray_variable__.values )
print(f" masking Tmin/max values outside of domain...\n")
except:
try:
bc_tmin.values = bc_tmin.where( Tmask )
bc_tmax.values = bc_tmax.where( Tmask )
print(f" masking Tmin/max values outside of domain...\n")
except:
print(f" could not mask Tmin/max values outside of domain ! ! !\n")
# --------- check for extremely low temps -----
print(f" min input ta2m: {np.nanmin(ta2m.values )} (K)")
print(f" min daily bc tmin: {np.nanmin(bc_tmin.values )} (C)")
print(f" max daily bc tmax: {np.nanmax(bc_tmax.values )} (C)")
# - - - - - apply correction to n-hr data - - - -
t0 = time.time()
print(" applying correction to "+str(tdelta_int)+"-hourly data")
if 'ta2m' in this_ds.data_vars:
for i in range(len(this_ds.time)):
t = int(i/(24/tdelta_int))
#### NOTE: if there are missing timesteps this goes terribly wrong! ####
# print(ta2m[i].time.values , tmin[t].time.values)
dtr = (tmax[t] - tmin[t]).values
dtr[dtr<0.001] = 0.001
this_ds["ta2m"][i] -= tmin[t]
this_ds["ta2m"][i] /= dtr
this_ds["ta2m"][i] *= np.abs(bc_tmax[t] - bc_tmin[t])
this_ds["ta2m"][i] += bc_tmin[t]+273.15
# try:
# if (bc_tmin[t].values>bc_tmax[t].values).any(): print(f" bc_tmin[t]>bc_tmax[t] @ t={t}")
# except:
# print(" i cannot code")
# ta2m[i] -= tmin[t]
# if np.nanmin( ta2m[i].values )<0: print(f" 1. min ta2m: {np.nanmin( ta2m[i].values )}")
# # if ta2m[i].any()<0 :
# # print(f" ta2m[i] -= tmin[t]: {ta2m[i]}")
# ta2m[i] /= dtr
# if np.nanmin( ta2m[i].values )<0: print(f" 2. min ta2m: {np.nanmin( ta2m[i].values )}")
# # if ta2m[i].any()<0 : print(f" ta2m[i] /= dtr: {ta2m[i] }")
# ta2m[i] *= np.abs(bc_tmax[t] - bc_tmin[t])
# if np.nanmin( ta2m[i].values )<0: print(f" 3. min ta2m: {np.nanmin( ta2m[i].values )}")
# # if ta2m[i].any()<0 : print(f" ta2m[i] *= np.abs(bc_tmax[t] - bc_tmin[t]): {ta2m[i] }")
# ta2m[i] += bc_tmin[t]+273.15
# if np.nanmin( ta2m[i].values )<0: print(f" 4. min ta2m: {np.nanmin( ta2m[i].values )}")
# # if ta2m[i].any()<0 : print(f" ta2m[i] += bc_tmin[t]+273.15: {ta2m[i] }")
elif 'Tmin' in this_ds.data_vars:
if tdelta_int==24:
print(' Tmin min before:', np.nanmin(this_ds['Tmin'].values))
print(' Tmax max before:', np.nanmax(this_ds['Tmax'].values))
this_ds['Tmin'].values = bc_tmin
this_ds['Tmax'].values = bc_tmax
print(' Tmin min after bc:', np.nanmin(this_ds['Tmin'].values))
print(' Tmax max after bc:', np.nanmax(this_ds['Tmax'].values))
else:
print(f"\n ! ! ! tdelta_int= {tdelta_int} but no ta2m in data_vars. Stopping. ! ! !")
sys.exit()
# for i in range(len(this_ds.time)): # Quick fix, revisit?
# t = int(i/(24/tdelta_int))
# this_ds['Tmin'].values[i] *= bc_tmin[t]/this_ds['Tmin'].values[i]
# this_ds['Tmax'].values[i] *= bc_tmax[t]/this_ds['Tmax'].values[i]
print(f" min output ta2m: {np.nanmin( ta2m.values )}")
# cleanup
gc.collect()
print(" ", np.round(time.time() - t0, 2))
return this_ds
# return ta2m