-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBC_Icar2Liv_5y_ta2m.py
523 lines (414 loc) · 25.7 KB
/
BC_Icar2Liv_5y_ta2m.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
#############################################################################
#
# Bias Correction of x-hourly ICAR data to the Livneh Grid.
#
# Temperature
#
# - corrects ta2m by calculating tmin, tmax, trange. Livneh Tmin & Tmax for Obs.
# - in 5 year blocks, leaves out the 5 years from ref data. (historical GCM-icar runs)
# - 2024-04-03: reads in 3hr ref data,makes daily Tmin/Tmax.
#
#
# Bert Kruyt, NCAR RAL, october 2023
#############################################################################
import argparse
import time
from datetime import datetime
import glob, os
import xarray as xr
import numpy as np
import multiprocessing as mp
import psutil
# the bc functions:
from bc_funcs_livgrid_5y import *
import sys
# sys.path.append('/glade/u/home/bkruyt/libraries/storylines/storylines')
sys.path.append('/glade/u/home/bkruyt/libraries')
sys.path.append('/glade/u/home/bkruyt/libraries/st_lines')
sys.path
import icar2gmet as i2g
# from storylines.tools import quantile_mapping # not used here but in bc_funcs.py
### If storylines gives an error , use py3_yifan kernel /conda env
# alternatively ../Example_scripts/2D_quantile_map/quantile_mapping.py
import warnings
warnings.simplefilter("ignore", category=Warning) #SerializationWarning ?
############################################
# Settings
############################################
bc_by_month=True
test = False # reduces datasets for faster processing, incorrect results!
calc_relhum = False # !! now done on icar grid before BC!! # add a relative humidity variable to the dataset, by calculating this from (corrected) ta2m and qv
dropvars=True # drop variables in vars_to_drop (below). Also writes to "3hr" subdir iso "3hr_ta2m" !
# the variables to remove:
vars_to_drop=["swe", "soil_water_content", "hfls", "hus2m",
"runoff_surface","runoff_subsurface",
"soil_column_total_water","soil_column_total_water",
"ivt","iwv","iwl","iwi", "snowfall_dt", "cu_precip_dt", "graupel_dt"]
ref_start = '1950-01-01' # the range of the reference dataset that overlaps with the obs. NOTE: Different for CMIP5!
# ref_end = '2014-12-31' # '2004-12-31' for CMIP5!
ref_end = '2018-12-31'
# ref_end = '1970-12-31' ## ! ! ! ! TEST
############################################
# functions
############################################
def process_command_line():
'''Parse the commandline'''
parser = argparse.ArgumentParser(description='Aggregate 1 day files to month(3h) and year (24h), while also fixing neg precip')
parser.add_argument('model', help='model')
parser.add_argument('scenario', help='scenario to process; of form sspXXX')
parser.add_argument('part', help='1 = 1950-2054; 2 = 2055-2099')
parser.add_argument('dt', help="time step of input ICAR data, either 'daily' or '3hr' ")
parser.add_argument('CMIP', help='CMIP5 or CMIP6')
return parser.parse_args()
## READ IN PCP bc files!!!
def get_icar_PCP_filelist(start_year, end_year, dt="3hr"):
"""returns a list of files (with full path) that fall within the period between start_year, end_year (strings %Y or int)"""
# print(start_year, end_year)
print(f' looking for input in: {base_in}/{model}_{scen}/{dt}_pcp/icar_*_*.nc')
files=[]
for y in range(int(start_year), int(end_year)+1) :
files.extend( glob.glob(f'{base_in}/{model}_{scen}/{dt}_pcp/icar_*_{y}*.nc') )
return files
def get_icar_ref_filelist(start_year, end_year, dt="3hr"):
"""returns a list of files (with full path) that fall within the period between start_year, end_year (strings %Y or int)"""
## When processing scenario scen, we want the full overlap of the reference period with obs, even thought that is partly in a scenario (technically from 2015 for CMIP6? 2005 CMIP5)
if scen[:4]=="hist" and CMIP=="CMIP6":
scen_load="ssp370"
elif scen[:4]=="hist" and CMIP=="CMIP5":
scen_load="rcp45"
else:
scen_load=scen
files=[]
for y in range(int(start_year), int(end_year)+1) :
if CMIP=="CMIP6":
files.extend( glob.glob(f'{ref_in}/{model}_hist/{dt}/icar_*_{y}*.nc') )
files.extend( glob.glob(f'{ref_in}/{model}_{scen_load}_2004/{dt}/icar_*_{y}*.nc') )
files.extend( glob.glob(f'{ref_in}/{model}_{scen_load}_2049/{dt}/icar_*_{y}*.nc') )
elif CMIP=="CMIP5":
files.extend( glob.glob(f'{ref_in}/{model}_historical/{dt}/icar_*_{y}*.nc') )
files.extend( glob.glob(f'{ref_in}/{model}_{scen_load}_2005_2050/{dt}/icar_*_{y}*.nc') )
files.extend( glob.glob(f'{ref_in}/{model}_{scen_load}_2050_2100/{dt}/icar_*_{y}*.nc') )
elif CMIP=="CESM2":
files.extend( glob.glob(f'{base_in}/{model}_{scen_load}/{dt}/icar_*_{y}*.nc') )
# sort files:
files=sorted(files)
if len(files)==0: print(f" No {dt} reference files found")
return files
def get_icar_filelist_CMIP5(start_year, end_year, dt="daily"): # Ryan has CMIP5 daily w/o cp, so use those for reference:
base_CMIP5 = "/glade/campaign/ral/hap/currierw/icar/output"
if scen[:4]=="hist" and CMIP=="CMIP5":
scen_load="rcp45"
else:
scen_load=scen
files=[]
for y in range(int(start_year), int(end_year)+1) :
if CMIP=="CMIP5":
files.extend( glob.glob(f'{base_CMIP5}/{model}_historical/{dt}/icar_*_{y}*.nc') )
files.extend( glob.glob(f'{base_CMIP5}/{model}_{scen_load}_2005_2050/{dt}/icar_*_{y}*.nc') )
files.extend( glob.glob(f'{base_CMIP5}/{model}_{scen_load}_2050_2100/{dt}/icar_*_{y}*.nc') )
err_path=f'{base_CMIP5}/{model}_{scen_load}_XXXX'
if len(files)==0: print(f"\n ERROR: could not load files from {err_path}")
return sorted(files)
def get_livneh_T(icar_1file):
"""get livneh data, cropped to the icar grid (defined by icar_1file)"""
t0 = time.time()
print("\n- - - - - - - - - - opening livneh files - - - - - - - - - - - - ")
### Livneh Temperature [degC]
tfiles = glob.glob("/glade/campaign/ral/hap/common/Livneh_met_updated/temp_and_wind/livneh_lusu_2020_temp_and_wind.2021-05-02.19[5-9]*.nc")
tfiles.extend(glob.glob("/glade/campaign/ral/hap/common/Livneh_met_updated/temp_and_wind/livneh_lusu_2020_temp_and_wind.2021-05-02.20*.nc"))
tfiles.sort()
print(' ',len(tfiles), " Livneh temperature files(years)" ) # the number of files (years)
if test:
livneht = xr.open_mfdataset(tfiles[:10]) # speed up for testing
print(' opening first 10 files for faster testing')
else:
livneht = xr.open_mfdataset(tfiles,parallel=True)
# # # crop livneh to ICAR bounds:
max_lat=icar_1file.lat.max().values#+buff
min_lat=icar_1file.lat.min().values#-buff
max_lon=icar_1file.lon.max().values#+buff
min_lon=icar_1file.lon.min().values#-buff
LatIndexer, LonIndexer = 'lat', 'lon'
livneht = livneht.sel(**{LatIndexer: slice(min_lat, max_lat ),
LonIndexer: slice(min_lon, max_lon)})
livneh_tmin = livneht["Tmin"] # + 273.15 #.load()
livneh_tmax = livneht["Tmax"] # + 273.15 #.load()
# # clean up some data; correct time dimension:
if 'Time' in livneh_tmin.dims:
livneh_tmin = livneh_tmin.rename( {'Time':'time'})
if not ('time' in livneh_tmin.dims):
print(' warning ' )
if 'Time' in livneh_tmax.dims:
livneh_tmax = livneh_tmax.rename( {'Time':'time'})
if not ('time' in livneh_tmax.dims):
print(' warning ' )
print(" ", time.time() - t0)
return livneh_tmin, livneh_tmax
def relative_humidity(t,qv,p):
# ! convert specific humidity to mixing ratio
mr = qv / (1-qv)
# ! convert mixing ratio to vapor pressure
e = mr * p / (0.62197+mr)
# ! convert temperature to saturated vapor pressure
es = 611.2 * np.exp(17.67 * (t - 273.15) / (t - 29.65))
# ! finally return relative humidity
relative_humidity = e / es
# ! because it is an approximation things could go awry and rh outside or reasonable bounds could break something else.
# ! alternatively air could be supersaturated (esp. on boundary cells) but cloud fraction calculations will break.
# relative_humidity = min(1.0, max(0.0, relative_humidity))
return relative_humidity
def get_dsRef_full(ref_start, ref_end):
"""returns the full reference dataset, cropped to the ICAR grid"""
# get file list for full ref period: try to load daily, if not: load 3hr, make daily.
print(f"* * * loading Reference data... * * *")
files_ref = get_icar_ref_filelist(ref_start.split('-')[0], ref_end.split('-')[0], dt="daily")
if len(files_ref) >= ( int(ref_end.split('-')[0]) -int(ref_start.split('-')[0])):
print(f" found {len(files_ref)} daily ref files")
else:
files_ref = get_icar_ref_filelist(ref_start.split('-')[0], ref_end.split('-')[0], dt="3hr")
print(f" found {len(files_ref)} 3hr ref files")
print(f" loading {len(files_ref)} icar files: {files_ref[0].split('/')[-1]} to {files_ref[-1].split('/')[-1]} ")
try:
dsRef_full = xr.open_mfdataset( files_ref)
except:
dsRef_full = xr.open_mfdataset( files_ref, combine='nested', compat='override')
print(f"\n !!! Warning: issues combining ref data were circumvented by using compat='override' !!! \n")
print(f" Ref full time: {dsRef_full.time.values.min()} to {dsRef_full.time.max().values} ")
# # make TMin/Tmax from Ref:
if 'Tmin' in dsRef_full.data_vars:
dsTmin_ref = dsRef_full.Tmin.load()
dsTmax_ref = dsRef_full.Tmax.load()
else:
print(" making daily tmin/tmax values from 3hr ta2m Reference data")
dsTmin_ref = dsRef_full.ta2m.resample(time='1D').min(dim='time').load()
dsTmax_ref = dsRef_full.ta2m.resample(time='1D').max(dim='time').load()
return dsTmin_ref, dsTmax_ref
def get_dsRef_ex5y(dsRef_full, fiveY_s, fiveY_e, ref_start, ref_end ):
""" takes strings of form %Y-%M-%D, returns dsRef_full without the 5year period defined by fiveY_s to fiveY_e"""
#### Subset reference period: ######
# A. If we have overlap at the start of ref period: (NB: cond fails if ref period < 5y)
if( ( datetime.strptime(fiveY_s, '%Y-%m-%d') <= datetime.strptime(ref_start, '%Y-%m-%d') ) and
( datetime.strptime(fiveY_e, '%Y-%m-%d') > datetime.strptime(ref_start, '%Y-%m-%d') )
):
print(f" ref period={fiveY_e} to {ref_end} ")
dsI_ref = dsRef_full.sel(time=slice(fiveY_e,ref_end))
# B. if 5y block starts after ref_start, and finshes before ref_end (i.e. falls within ref period):
elif( ( datetime.strptime(ref_start, '%Y-%m-%d') < datetime.strptime(fiveY_s, '%Y-%m-%d') < datetime.strptime(ref_end, '%Y-%m-%d') ) and
( datetime.strptime(fiveY_e, '%Y-%m-%d') < datetime.strptime(ref_end, '%Y-%m-%d') )
):
print(f" ref period={ref_start} to {fiveY_s} and {fiveY_e} to {ref_end} ")
ref1 = dsRef_full.sel(time=slice(ref_start,fiveY_s))
ref2 = dsRef_full.sel(time=slice(fiveY_e,ref_end))
dsI_ref = xr.concat([ref1, ref2], dim='time')
# C. if 5y block starts after ref_start, and finishes after ref_end
elif( ( datetime.strptime(ref_start, '%Y-%m-%d') < datetime.strptime(fiveY_s, '%Y-%m-%d') < datetime.strptime(ref_end, '%Y-%m-%d') ) and
( datetime.strptime(fiveY_e, '%Y-%m-%d') > datetime.strptime(ref_end, '%Y-%m-%d') )
):
print(f" ref period= {ref_start} to {fiveY_s} ")
dsI_ref = dsRef_full.sel(time=slice(ref_start,fiveY_s))
else: # D. else
print(f" ref period= {ref_start} to {ref_end} ")
dsI_ref = dsRef_full.sel(time=slice(ref_start,ref_end))
return dsI_ref
def drop_vars(ds, vars_to_drop=vars_to_drop):
""" drop unwanted variables """
vars_to_drop2=[]
for v in vars_to_drop:
if v not in ds.data_vars:
print(f"var to drop {v} not in ds.data_vars")
else:
vars_to_drop2.append(v)
ds_out = ds.drop_vars(vars_to_drop2)
return ds_out
############################################
# Main #
# ###########################################
if __name__ == '__main__':
t00 = time.time()
# process command line
args = process_command_line()
model = args.model
scen = args.scenario
part = args.part
dt = args.dt # timestep is either "daily" or "3hr"
CMIP = args.CMIP
print('\n ########################################################')
print( args.model, args.scenario, args.dt, ' part', args.part, CMIP)
print(' ########################################################', '\n')
# base_in = f"/glade/campaign/ral/hap/bert/{CMIP}/WUS_icar_livBC2" # PCP in! # This needs to be the same as path_out in BC_Icar2Liv_5y_pcp.py !!!
# set paths based on cmip:
if CMIP=="CMIP6":
# base_in = f"/glade/campaign/ral/hap/bert/{CMIP}/WUS_icar_livBC3" # PCP in! # ta2m mased correctlt
ref_in = f"/glade/derecho/scratch/bkruyt/{CMIP}/WUS_icar_LivGrd3" # referenece files
path_out = f"/glade/campaign/ral/hap/bert/{CMIP}/WUS_icar_livBC3"
# path_out = f"/glade/campaign/ral/hap/bert/{CMIP}/WUS_icar_livBC4" # test for ta2m anomaly
base_in = path_out
elif CMIP=="CMIP5":
base_in = f"/glade/campaign/ral/hap/bert/{CMIP}/WUS_icar_livBC4" # BC PCP as input
ref_in = f"/glade/derecho/scratch/bkruyt/{CMIP}/WUS_icar_LivGrd4"
path_out = f"/glade/campaign/ral/hap/bert/{CMIP}/WUS_icar_livBC4" # 4: Correct GCM cp removed in postproccessing.
elif CMIP=="CESM2":
ref_in = f"/glade/campaign/ral/hap/bert/CESM2/livneh/regrid_input" # referenece files (for T only)
path_out = f"/glade/campaign/ral/hap/bert/CESM2/livneh/bias_corrected"
base_in = path_out # input files already have pcp corrected
# create out dir if it does not exist
if dropvars and not os.path.exists(f"{path_out}/{model}_{scen}/{dt}"):
os.makedirs(f"{path_out}/{model}_{scen}/{dt}")
print("Created directory " + f"{path_out}/{model}_{scen}/{dt}")
elif dropvars and os.path.exists(f"{path_out}/{model}_{scen}/{dt}"):
print(f" existing files in {path_out}/{model}_{scen}/{dt} will be overwritten!")
elif not os.path.exists(f"{path_out}/{model}_{scen}/{dt}_ta2m"):
os.makedirs(f"{path_out}/{model}_{scen}/{dt}_ta2m")
print("Created directory " + f"{path_out}/{model}_{scen}/{dt}_ta2m")
##################### define periods: (refactor) ######################
if CMIP=="CMIP6": # historical until 2014-12-31
if scen[:4]=="hist":
time_s=['1950-01-01','1955-01-01','1960-01-01','1965-01-01','1970-01-01','1975-01-01','1980-01-01','1985-01-01','1990-01-01','1995-01-01','2000-01-01','2005-01-01','2010-01-01']
time_f=['1954-12-31','1959-12-31','1964-12-31','1969-12-31','1974-12-31','1979-12-31','1984-12-31','1989-12-31','1994-12-31','1999-12-31','2004-12-31','2009-12-31', '2014-12-31']
else:
time_s=['2015-01-01','2020-01-01','2025-01-01','2030-01-01','2035-01-01','2040-01-01','2045-01-01','2050-01-01','2055-01-01','2060-01-01','2065-01-01','2070-01-01','2075-01-01','2080-01-01','2085-01-01','2090-01-01','2095-01-01']
time_f=['2019-12-31','2024-12-31','2029-12-31','2034-12-31','2039-12-31','2044-12-31','2049-12-31','2054-12-31','2059-12-31','2064-12-31','2069-12-31','2074-12-31','2079-12-31','2084-12-31','2089-12-31','2094-12-31','2099-12-30']
elif CMIP=="CMIP5": # historical until 2004-12-31
if scen[:4]=="hist":
time_s=['1950-01-01','1955-01-01','1960-01-01','1965-01-01','1970-01-01','1975-01-01','1980-01-01','1985-01-01','1990-01-01','1995-01-01','2000-01-01']
time_f=['1954-12-31','1959-12-31','1964-12-31','1969-12-31','1974-12-31','1979-12-31','1984-12-31','1989-12-31','1994-12-31','1999-12-31','2004-12-31']
else:
time_s=['2005-01-01','2010-01-01','2015-01-01','2020-01-01','2025-01-01','2030-01-01','2035-01-01','2040-01-01','2045-01-01','2050-01-01','2055-01-01','2060-01-01','2065-01-01','2070-01-01','2075-01-01','2080-01-01','2085-01-01','2090-01-01','2095-01-01']
time_f=['2009-12-31','2014-12-31','2019-12-31','2024-12-31','2029-12-31','2034-12-31','2039-12-31','2044-12-31','2049-12-31','2054-12-31','2059-12-31','2064-12-31','2069-12-31','2074-12-31','2079-12-31','2084-12-31','2089-12-31','2094-12-31','2099-12-30']
elif CMIP=="CESM2":
time_s=[
'1900-01-01','1905-01-01','1910-01-01','1915-01-01','1920-01-01','1925-01-01','1930-01-01','1935-01-01','1940-01-01','1945-01-01','1950-01-01','1955-01-01','1960-01-01','1965-01-01','1970-01-01','1975-01-01','1980-01-01','1985-01-01','1990-01-01','1995-01-01','2000-01-01','2005-01-01','2010-01-01','2015-01-01','2020-01-01','2025-01-01','2030-01-01','2035-01-01','2040-01-01','2045-01-01','2050-01-01','2055-01-01','2060-01-01','2065-01-01','2070-01-01','2075-01-01','2080-01-01','2085-01-01','2090-01-01','2095-01-01']
time_f=[
'1904-12-31','1909-12-31','1914-12-31','1919-12-31','1924-12-31','1929-12-31','1934-12-31','1939-12-31','1944-12-31','1949-12-31','1954-12-31','1959-12-31','1964-12-31','1969-12-31','1974-12-31','1979-12-31','1984-12-31','1989-12-31','1994-12-31','1999-12-31','2004-12-31','2009-12-31','2014-12-31','2019-12-31','2024-12-31','2029-12-31','2034-12-31','2039-12-31','2044-12-31','2049-12-31','2054-12-31','2059-12-31','2064-12-31','2069-12-31','2074-12-31','2079-12-31','2084-12-31','2089-12-31','2094-12-31','2099-12-30']
if int(args.part)==1:
ts=0
elif int(args.part)==2: # if we've run sth before
# exp: get index in time_s from last completed run:
# if len(done)==0:
if dropvars:
done=sorted(glob.glob(f"{path_out}/{model}_{scen}/{dt}/icar_{dt}_livgrid_{model}_{scen.split('-')[0]}_*.nc" ))
else:
done=sorted(glob.glob(f"{path_out}/{model}_{scen}/{dt}_ta2m/icar_{dt}_livgrid_{model}_{scen.split('-')[0]}_*.nc" ))
last_run_s=done[-1].split('_')[-1].split('-')[0]
last_run_f=done[-1].split('_')[-1].split('-')[1]
print(f"last succesfull 5y block was {last_run_s} to {last_run_f} ")
res = [i for i in time_s if last_run_s in i]
if time_s.index(res[0])==len( time_s): # if we ran to completion.
print("done, no need to rerun. Stopping")
sys.exit()
elif os.path.getsize(done[-1])==os.path.getsize(done[0]):
# elif os.path.getsize(done[-1])==os.path.getsize(done[-2]): # check if File sizes of last 2 files are similar: (if last file was written to completion)
ts = time_s.index(res[0]) +1
print(f" last block ran to completion, (re)starting at {time_s[ts]}")
elif time_s.index(res[0])==2 or time_s.index(res[0])==-1: #(except when it is the 2nd or last file, then the file sizes are different. )
ts = time_s.index(res[0])+1
print(f" check last 5y block's filesize. (re)starting at {time_s[ts]}")
else: # redo the last file
ts = time_s.index(res[0])
print(f" last block did not run to completion, (re)starting at {time_s[ts]}")
elif int(args.part)==3: # custom
# time_s=['2025-01-01', '2030-01-01','2035-01-01', '2040-01-01']
# time_f=['2029-12-31', '2034-12-31','2039-12-31', '2044-12-31']
# time_s=['2085-01-01']
# time_f=['2089-12-31']
time_s=['2065-01-01']
time_f=['2069-12-31']
# time_s=['1950-01-01','1970-01-01']
# time_f=['1954-12-31', '1974-12-31' ]
# time_s=['2005-01-01', '2010-01-01', '2055-01-01']
# time_f=['2009-12-31', '2014-12-31', '2059-12-31']
# time_s=['2070-01-01']
# time_f=['2074-12-31']
ts=0
#- - - - - - -B. define (full) reference period - - - - - - -
print(" Memory use before loading anythng:")
# Getting % usage of virtual_memory ( 3rd field)
print(' * * * RAM memory % used:', psutil.virtual_memory()[2], ' * * * ')
# Getting usage of virtual_memory in GB ( 4th field)
print(' * * * RAM Used (GB):', psutil.virtual_memory()[3]/1000000000, ' * * * ')
dsTmin_ref, dsTmax_ref = get_dsRef_full(ref_start, ref_end)
# - - - - - C. Define data to match: (Livneh) - - - - -
files_ref = get_icar_ref_filelist(ref_start.split('-')[0], ref_start.split('-')[0], dt=dt) # one ICAR file to crop livneh with
icar_1_for_grid = xr.open_mfdataset( files_ref[0]) # one ICAR file to crop livneh with
livneh_tmin, livneh_tmax = get_livneh_T(icar_1file=icar_1_for_grid.isel(time=0))
#load the var to correct;
livneh_tmin = livneh_tmin.load()
livneh_tmax = livneh_tmax.load()
################# Loop through the 5y periods: ###############
for t in range(ts,len(time_s)):
t0=time.time()
print(f"\n ________ Processing period {time_s[t]} to {time_f[t]}: ________ ")
start_year = time_s[t].split('-')[0]
end_year = time_f[t].split('-')[0]
# ICAR input data to correct:
files_in = get_icar_PCP_filelist(time_s[t].split('-')[0], time_f[t].split('-')[0], dt=dt)
dsI_in = xr.open_mfdataset( files_in ) # loaded into memory in bc_func
# # subset reference data, (leave out 5years)
dsTmin_ref_ex5y= get_dsRef_ex5y(dsTmin_ref, fiveY_s=time_s[t], fiveY_e=time_f[t], ref_start=ref_start, ref_end=ref_end )
dsTmax_ref_ex5y= get_dsRef_ex5y(dsTmax_ref, fiveY_s=time_s[t], fiveY_e=time_f[t], ref_start=ref_start, ref_end=ref_end )
print(f" Ref time: {dsTmin_ref_ex5y.time.values.min()} to {dsTmin_ref_ex5y.time.max().values} ")
# print(f" min liv tmin: {np.nanmin(livneh_tmin)}")
# print(f" min Tmin ref ex5y: {np.nanmin(dsTmin_ref_ex5y.isel(lon=slice(None, -104)).values)}")
# try:
# print(f" min ta2m in: {np.nanmin(dsI_in.ta2m.isel(lon=slice(None, -104)).values)}")
# except:
# print(f" min ta2m in: {np.nanmin(dsI_in.Tmin.isel(lon=slice(None, -104)).values)}")
# -------- the bias correction functions ---------
T_corrected_ds = correct_temperature( this_ds = dsI_in #pcp_corrected_ds
,dsObs_tmin = livneh_tmin
,dsObs_tmax = livneh_tmax
,dsRef_tmin = dsTmin_ref_ex5y
,dsRef_tmax = dsTmax_ref_ex5y
,bc_by_month = bc_by_month
)
print(" Memory use after bc:")
# Getting % usage of virtual_memory ( 3rd field)
print(' * * * RAM memory % used:', psutil.virtual_memory()[2], ' * * * ')
# Getting usage of virtual_memory in GB ( 4th field)
print(' * * * RAM Used (GB):', psutil.virtual_memory()[3]/1000000000, ' * * * ')
#_______ calculate relative humidity at 2m: ________
if dt == "3hr" and 'ta2m' in T_corrected_ds.data_vars and calc_relhum:
print(" calculating relative humidity" )
# if static_P==True:
# # make a np array for the length of the input data with (static) pressure:
# P = np.repeat(dsP.psfc.values[np.newaxis, 12, :, :], len(ds.time), axis=0)
# else: # if the data has surface pressure available:
try:
T_corrected_ds.psfc
except AttributeError:
print("no psfc in source dataset!")
else:
P = T_corrected_ds.psfc.values
# calculate relative humidity:
relhum = T_corrected_ds.hus2m.copy()
relhum.values = relative_humidity(t=T_corrected_ds.ta2m.values, qv=T_corrected_ds.hus2m.values, p= P )
# replace values over 1:
relhum.values[relhum.values>1]=1.0
# replace negative values
relhum.values[relhum.values<0.0]=0.0
## if it is a dataArray we can add it like this:
T_corrected_ds = T_corrected_ds.assign(relhum2m=relhum)
T_corrected_ds.relhum2m.attrs['standard_name'] = "relative_humidity_2m"
T_corrected_ds.relhum2m.attrs['units'] = "-"
# remove vars
if dropvars==True :
print(f" dropping unwanted variables ")
T_corrected_ds = drop_vars(T_corrected_ds)
##################### # save to file ######################
if dropvars:
file_out = f"{path_out}/{model}_{scen}/{dt}/icar_{dt}_livgrid_{model}_{scen.split('-')[0]}_{time_s[t].split('-')[0]}-{time_f[t].split('-')[0]}.nc"
else:
file_out = f"{path_out}/{model}_{scen}/{dt}_ta2m/icar_{dt}_livgrid_{model}_{scen.split('-')[0]}_{time_s[t].split('-')[0]}-{time_f[t].split('-')[0]}.nc"
T_corrected_ds.to_netcdf( file_out)
print(" Memory use after saving to file:")
# Getting % usage of virtual_memory ( 3rd field)
print(' * * * RAM memory % used:', psutil.virtual_memory()[2], ' * * * ')
# Getting usage of virtual_memory in GB ( 4th field)
print(' * * * RAM Used (GB):', psutil.virtual_memory()[3]/1000000000, ' * * * ')
print(" \n ")
print(" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ")
print(f" - - - bias corrected file written to {file_out} - - - ")
print(f" - - - took {np.round(time.time()-t0,1)} sec - - - " )
print(" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ")
print(" \n ")
print(f" - - - {model} {scen} {dt} {part} took {np.round((time.time()-t00)/60,1)} min - - - " )