Skip to content

Commit

Permalink
Merge pull request #46 from bertinia/master
Browse files Browse the repository at this point in the history
test and fix timeseries "chunking" code for issue #14
  • Loading branch information
bertinia authored Nov 27, 2016
2 parents ccef904 + 7dfcda1 commit ca458ac
Show file tree
Hide file tree
Showing 33 changed files with 709 additions and 293 deletions.
8 changes: 8 additions & 0 deletions Config/config_postprocess.xml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@
desc="If TRUE, create the single variable time series files using the history time slice files. All the time invariant metadata is included in each variable time series file header. Rules for how the time series variable files are created are specified in the env_archive.xml file."
></entry>

<entry id="TIMESERIES_COMPLETECHUNK"
type="logical"
valid_values="TRUE,FALSE"
value="TRUE"
group="postprocess"
desc="If TRUE, create only complete chunks of variable timeseries data files as determined by the env_timeseries.xml tseries_filecat_tper and tseries_filecat_n elements. If FALSE, then incomplete chunks of variable timeseries data will be created and appended to upon subsequent running of the timeseries script. Default is TRUE."
></entry>

<entry id="GENERATE_AVGS_ATM"
type="logical"
valid_values="TRUE,FALSE"
Expand Down
120 changes: 59 additions & 61 deletions Config/config_timeseries.xml

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions Config/config_timeseries.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
<!-- definition of simple elements -->
<xs:element name="rootdir" type="xs:string"/>
<xs:element name="multi_instance" type="xs:string"/>
<xs:element name="default_calendar" type="xs:string"/>
<xs:element name="subdir" type="xs:string"/>
<xs:element name="tseries_create" type="xs:string"/>
<xs:element name="tseries_output_format" type="xs:string"/>
<xs:element name="tseries_output_subdir" type="xs:string"/>
<xs:element name="tseries_tper" type="xs:string"/>
<xs:element name="tseries_filecat_tper" type="xs:string"/>
<xs:element name="tseries_filecat_n" type="xs:string"/>
Expand All @@ -25,7 +25,6 @@
<xs:element name="subdir" minOccurs="1" maxOccurs="1" />
<xs:element name="tseries_create" minOccurs="1" maxOccurs="1" />
<xs:element name="tseries_output_format" minOccurs="1" maxOccurs="1" />
<xs:element name="tseries_output_subdir" minOccurs="1" maxOccurs="1" />
<xs:element name="tseries_tper" minOccurs="1" maxOccurs="1" />
<xs:element name="tseries_filecat_tper" minOccurs="1" maxOccurs="1" />
<xs:element name="tseries_filecat_n" minOccurs="1" maxOccurs="1" />
Expand Down Expand Up @@ -55,6 +54,7 @@
<xs:sequence>
<xs:element name="rootdir" minOccurs="1" maxOccurs="1" />
<xs:element name="multi_instance" minOccurs="1" maxOccurs="1" />
<xs:element name="default_calendar" minOccurs="1" maxOccurs="1" />
<xs:element name="files" minOccurs="0" maxOccurs="unbounded" />
<xs:element name="tseries_time_variant_variables" minOccurs="0" maxOccurs="1" />
</xs:sequence>
Expand Down
11 changes: 6 additions & 5 deletions Machines/machine_postprocess.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<machine_postprocess>

<machine name="yellowstone" hostname="yslogin">
<timeseries_pes queue="regular" pes_per_node="4" wallclock="02:00">128</timeseries_pes>
<timeseries_pes queue="regular" pes_per_node="15" wallclock="02:00">128</timeseries_pes>
<mpi_command>mpirun.lsf</mpi_command>
<pythonpath></pythonpath>
<f2py fcompiler="gfortran" f77exec="/usr/bin/gfortran">f2py</f2py>
Expand All @@ -28,29 +28,30 @@
<module>module load intel/12.1.5</module>
<module>module load netcdf/4.3.0</module>
<module>module load nco/4.4.4</module>
<module>module load netcdf4python/1.1.1</module>
<module>module use /glade/apps/contrib/ncl-nightly/modules</module>
<module>module load ncltest-intel</module>
</modules>
<components>
<component name="atm">
<averages_pes queue="regular" pes_per_node="4" wallclock="00:30">128</averages_pes>
<averages_pes queue="regular" pes_per_node="15" wallclock="00:30">128</averages_pes>
<diagnostics_pes queue="geyser" pes_per_node="8" wallclock="02:00">16</diagnostics_pes>
<regrid_pes queue="geyser" pes_per_node="2" wallclock="02:00">6</regrid_pes>
<obs_root>/glade/p/cesm/amwg/amwg_data</obs_root>
</component>
<component name="ice">
<averages_pes queue="regular" pes_per_node="2" wallclock="00:30">128</averages_pes>
<averages_pes queue="regular" pes_per_node="15" wallclock="00:30">128</averages_pes>
<diagnostics_pes queue="geyser" pes_per_node="2" wallclock="01:00">4</diagnostics_pes>
<obs_root>/glade/p/cesm/pcwg/ice/data</obs_root>
</component>
<component name="lnd">
<averages_pes queue="regular" pes_per_node="2" wallclock="02:00">128</averages_pes>
<averages_pes queue="regular" pes_per_node="15" wallclock="02:00">128</averages_pes>
<diagnostics_pes queue="geyser" pes_per_node="4" wallclock="02:00">12</diagnostics_pes>
<regrid_pes queue="geyser" pes_per_node="2" wallclock="02:00">6</regrid_pes>
<obs_root>/glade/p/cesm/lmwg/diag/lnd_diag_data</obs_root>
</component>
<component name="ocn">
<averages_pes queue="regular" pes_per_node="8" wallclock="00:30">128</averages_pes>
<averages_pes queue="regular" pes_per_node="15" wallclock="00:30">128</averages_pes>
<diagnostics_pes queue="geyser" pes_per_node="4" wallclock="02:00">16</diagnostics_pes>
<obs_root>/glade/p/cesm</obs_root>
</component>
Expand Down
1 change: 1 addition & 0 deletions Machines/yellowstone_modules
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ module load intel/12.1.5
module load netcdf/4.3.0
module load nco/4.4.4
module load ncl/6.3.0
module load netcdf4python/1.1.1

# prepend the virtualenv into the PATH
PATH=/glade/apps/contrib/virtualenv/12.0.7:${PATH}
Expand Down
39 changes: 39 additions & 0 deletions Templates/batch_yellowstone.tmpl
Original file line number Diff line number Diff line change
@@ -1,3 +1,42 @@
##########
##
## General rules for determining PE counts and distribution across nodes
## ---------------------------------------------------------------------
##
## Averages:
##
## For avearges, set -n equal to the number of variables to be averaged
## plus the number of averages to be computed. The ptile should always
## be set to 15 on yellowstone exclusive nodes.
##
## For ocean hi-resolution or atm data sets with a lot of variables,
## set the netcdf_format XML variable to netcdfLarge, change the queue to
## either geyser (shared) or bigmem (exclusive). For geyser, set -n to 16
## and ptile to 2 or more. Or, set -n < 16 and ptile to 1 which will
## allow for more memory usage. The -W setting may also need to be
## increased for large data sets.
##
##########
##
## Diagnostics:
##
## For diagnostics, the queue should always be set to geyser or caldera
## with the -n not to exceed the number of plot sets to be created.
## The ptile can be adjusted depending on the size of the input climo
## and average files.
##
##########
##
## Variable Time series generation:
##
## On the yellowstone queues, -n should be set to (number of variables)/2
## and ptile = 15. For geyser or caldera, the maximum -n is 16 and the
## ptile can be adjusted based on what the memory requirements might
## be depending on the variable size and number of history time slices
## to be included in the final single variable output file.
##
##########

#BSUB -n {{ pes }}
#BSUB -R "span[ptile={{ ppn }}]"
#BSUB -q {{ queue }}
Expand Down
91 changes: 91 additions & 0 deletions averager/pp_tests/control_ocn_series.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env python

from __future__ import print_function
import sys

# check the system python version and require 2.7.x or greater
if sys.hexversion < 0x02070000:
print(70 * '*')
print('ERROR: {0} requires python >= 2.7.x. '.format(sys.argv[0]))
print('It appears that you are running python {0}'.format(
'.'.join(str(x) for x in sys.version_info[0:3])))
print(70 * '*')
sys.exit(1)

import os

#
# check the POSTPROCESS_PATH which must be set
#
try:
os.environ["POSTPROCESS_PATH"]
except KeyError:
err_msg = ('create_postprocess ERROR: please set the POSTPROCESS_PATH environment variable.' \
' For example on yellowstone: setenv POSTPROCESS_PATH /glade/p/cesm/postprocessing')
raise OSError(err_msg)

cesm_pp_path = os.environ["POSTPROCESS_PATH"]

#
# activate the virtual environment that was created by create_python_env
#
if not os.path.isfile('{0}/cesm-env2/bin/activate_this.py'.format(cesm_pp_path)):
err_msg = ('create_postprocess ERROR: the virtual environment cesm-env2 does not exist.' \
' Please run $POSTPROCESS_PATH/create_python_env -machine [machine name]')
raise OSError(err_msg)

execfile('{0}/cesm-env2/bin/activate_this.py'.format(cesm_pp_path), dict(__file__='{0}/cesm-env2/bin/activate_this.py'.format(cesm_pp_path)))

from pyaverager import PyAverager, specification

#### User modify ####

in_dir='/glade/scratch/aliceb/BRCP85C5CN_ne120_t12_pop62.c13b17.asdphys.001/ocn/proc/tseries/monthly'
out_dir= '/glade/scratch/aliceb/BRCP85C5CN_ne120_t12_pop62.c13b17.asdphys.001/ocn/proc/tavg.2041.2050'
pref= 'BRCP85C5CN_ne120_t12_pop62.c13b17.asdphys.001.pop.h'
htype= 'series'
average = ['hor.meanConcat:2041:2050']
wght= False
ncfrmt = 'netcdfLarge'
serial=False

#var_list = ['TEMP','SALT','PD','UVEL','VVEL','WVEL','IAGE','TAUX','TAUY','SSH','HMXL','HBLT','SFWF','PREC_F','MELT_F','MELTH_F','SHF','SHF_QSW','SENH_F','QFLUX','SNOW_F','SALT_F','EVAP_F','ROFF_F','LWUP_F','LWDN_F']
region_nc_var = 'REGION_MASK'
regions={1:'Sou',2:'Pac',3:'Ind',6:'Atl',8:'Lab',9:'Gin',10:'Arc',11:'Hud',0:'Glo'}
region_wgt_var = 'TAREA'
var_list = ['TEMP', 'SALT']
mean_diff_rms_obs_dir = '/glade/p/cesm/omwg/timeseries_obs_tx0.1v2_62lev/'
region_nc_var = 'REGION_MASK'
obs_dir = '/glade/p/cesm/omwg/timeseries_obs_tx0.1v2_62lev/'
obs_file = 'obs.nc'
reg_obs_file_suffix = '_hor_mean_obs.nc'
vertical_levels = 62

clobber = False
suffix = 'nc'
date_pattern= 'yyyymm-yyyymm'

#### End user modify ####

pyAveSpecifier = specification.create_specifier(in_directory=in_dir,
out_directory=out_dir,
prefix=pref,
suffix=suffix,
date_pattern=date_pattern,
hist_type=htype,
avg_list=average,
weighted=wght,
ncformat=ncfrmt,
varlist=var_list,
serial=serial,
clobber=clobber,
mean_diff_rms_obs_dir=mean_diff_rms_obs_dir,
region_nc_var=region_nc_var,
regions=regions,
region_wgt_var=region_wgt_var,
obs_dir=obs_dir,
obs_file=obs_file,
reg_obs_file_suffix=reg_obs_file_suffix,
vertical_levels=vertical_levels)
PyAverager.run_pyAverager(pyAveSpecifier)

38 changes: 38 additions & 0 deletions averager/pp_tests/runAvg_ocn_mpi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#! /usr/bin/env bash

#BSUB -n 6
#BSUB -q geyser
#BSUB -N
#BSUB -W 12:00
#BSUB -R "span[ptile=1]"
#BSUB -P P93300606
#BSUB -o pyAve.%J.out # output file name in which %J is replaced by the job ID
#BSUB -e pyAve.%J.err # error file name in which %J is replaced by the job ID

. /glade/apps/opt/lmod/lmod/init/bash

module restore system
module load python/2.7.7

cd /glade/p/work/aliceb/sandboxes/dev/postprocessing/cesm-env2/bin
pwd
. activate

module load python/2.7.7
module load numpy/1.8.1
module load scipy/0.15.1
module load mpi4py/2.0.0
module load pynio/1.4.1
module load matplotlib/1.4.3
module load intel/12.1.5
module load netcdf/4.3.0
module load nco/4.4.4
module use /glade/apps/contrib/ncl-nightly/modules
module load ncltest-intel

export POSTPROCESS_PATH=/glade/p/work/aliceb/sandboxes/dev/postprocessing

mpirun.lsf /glade/p/work/aliceb/sandboxes/dev/postprocessing/averager/pp_tests/control_ocn_series.py

deactivate

4 changes: 3 additions & 1 deletion averager/pyAverager/pyaverager/PyAverager.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ def compute_averages(self,spec):
region_name = spec.regions[int(region_num)]
# Remove the region number as part of the average name
ave_descr[0] = ave_name_split[0]
# get the number of vertical levels
nlev = spec.vertical_levels
else:
region_name = 'null'
region_num = -99
Expand Down Expand Up @@ -386,7 +388,7 @@ def compute_averages(self,spec):
# The mean diff rsm function will send the variables once they are created
var_avg_results,var_DIFF_results,var_RMS_results = climAverager.mean_diff_rms(var,region_name,region_num,spec.region_nc_var,
spec.region_wgt_var,years,hist_dict,ave_t.average_types[ave_descr[0]],file_dict,obs_file,
reg_obs_file,inter_comm,spec.serial,VNAME_TAG,AVE_TAG)
reg_obs_file,inter_comm,spec.serial,VNAME_TAG,AVE_TAG,nlev)
else:
if ('__metaChar' in orig_var):
# Handle special meta
Expand Down
21 changes: 14 additions & 7 deletions averager/pyAverager/pyaverager/climAverager.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def weighted_avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fil
return var_Ave


def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict):
def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,nlev):

'''
Computes the weighted hor mean rms diff for a year
Expand All @@ -336,6 +336,8 @@ def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist
@param hist_dict A dictionary that holds file references for all years/months.
@param nlev Number of ocean vertical levels
@param ave_info A dictionary of the type of average that is to be done.
Includes: type, months_to_average, fn, and weights
(weights are not used in this function/average)
Expand All @@ -344,6 +346,7 @@ def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist
are needed by this average calculation.
@return var_Ave The averaged results for this variable across the designated time frame.
'''

# Get correct data slice from the yearly average file
Expand All @@ -357,7 +360,7 @@ def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist
region_mask = MA.expand_dims(slev_mask, axis=0)
weights = MA.expand_dims(slev_weights, axis=0)
if var_val.ndim > 2:
for lev in range(1,60):
for lev in range(1,nlev):
new_region_mask = MA.expand_dims(slev_mask, axis=0)
region_mask = np.vstack((region_mask,new_region_mask))
new_weights = MA.expand_dims(slev_weights, axis=0)
Expand Down Expand Up @@ -404,7 +407,7 @@ def diff_var(var, avg_test_slice, obs_file):

return var_Avg_diff

def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,avg_test_slice,obs_file):
def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,avg_test_slice,obs_file,nlev):

'''
Computes the weighted rms for a year
Expand Down Expand Up @@ -434,6 +437,8 @@ def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dic
@param obs_file Observation file that contains the values to be used in the caluculation.
@param nlev Number of ocean vertical levels
@return nrms The normalized rms results for this variable.
'''

Expand All @@ -448,7 +453,7 @@ def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dic
# Since weights and region mask are only one level, we need to expand them to all levels
region_mask = MA.expand_dims(slev_mask, axis=0)
weights = MA.expand_dims(slev_weights, axis=0)
for lev in range(1,60):
for lev in range(1,nlev):
new_region_mask = MA.expand_dims(slev_mask, axis=0)
region_mask = np.vstack((region_mask,new_region_mask))
new_weights = MA.expand_dims(slev_weights, axis=0)
Expand All @@ -474,7 +479,7 @@ def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dic

return nrms

def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,obs_file,reg_obs_file,simplecomm,serial,MPI_TAG,AVE_TAG):
def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,obs_file,reg_obs_file,simplecomm,serial,MPI_TAG,AVE_TAG,nlev):

'''
Computes the weighted hor mean rms diff for a year
Expand Down Expand Up @@ -510,6 +515,8 @@ def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,
@MPI_TAG Integer tag used to communicate message numbers.
@param nlev Number of ocean vertical levels
@return var_Ave The averaged results for this variable.
@return var_DIFF The difference results for this variable.
Expand All @@ -523,7 +530,7 @@ def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,
var_rms = var+'_RMS'

## Get the masked regional average
var_Avg = weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year[0],hist_dict,ave_info,file_dict)
var_Avg = weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year[0],hist_dict,ave_info,file_dict,nlev)
## Send var_Avg results to local root to write
if (not serial):
#md_message_v = {'name':var,'shape':var_Avg.shape,'dtype':var_Avg.dtype,'average':var_Avg}
Expand All @@ -541,7 +548,7 @@ def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,
## Get the RMS from the obs diff
var_slice = rover.fetch_slice(hist_dict,year[0],0,var,file_dict)
temp_diff = diff_var(var, var_slice, obs_file)
var_RMS = weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year[0],hist_dict,ave_info,file_dict,temp_diff,obs_file)
var_RMS = weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year[0],hist_dict,ave_info,file_dict,temp_diff,obs_file,nlev)
## Send var_RMS results to local root to write
if (not serial):
#md_message = {'name':var_rms,'shape':var_RMS.shape,'dtype':var_RMS.dtype,'average':var_RMS}
Expand Down
Loading

0 comments on commit ca458ac

Please sign in to comment.