Merge pull request #46 from bertinia/master

test and fix timeseries "chunking" code for issue #14
NCAR · Nov 27, 2016 · ca458ac · ca458ac
2 parents ccef904 + 7dfcda1
commit ca458ac
Show file tree

Hide file tree

Showing 33 changed files with 709 additions and 293 deletions.
diff --git a/Config/config_postprocess.xml b/Config/config_postprocess.xml
@@ -128,6 +128,14 @@
 	     desc="If TRUE, create the single variable time series files using the history time slice files. All the time invariant metadata is included in each variable time series file header. Rules for how the time series variable files are created are specified in the env_archive.xml file."
 	     ></entry> 
 
+      <entry id="TIMESERIES_COMPLETECHUNK" 
+	     type="logical"
+	     valid_values="TRUE,FALSE"  
+	     value="TRUE" 
+	     group="postprocess"
+	     desc="If TRUE, create only complete chunks of variable timeseries data files as determined by the env_timeseries.xml tseries_filecat_tper and tseries_filecat_n elements. If FALSE, then incomplete chunks of variable timeseries data will be created and appended to upon subsequent running of the timeseries script. Default is TRUE."
+	     ></entry> 
+
       <entry id="GENERATE_AVGS_ATM"  
 	     type="logical"
 	     valid_values="TRUE,FALSE"  

diff --git a/Config/config_timeseries.xml b/Config/config_timeseries.xml
diff --git a/Config/config_timeseries.xsd b/Config/config_timeseries.xsd
@@ -9,10 +9,10 @@
   <!-- definition of simple elements -->
   <xs:element name="rootdir" type="xs:string"/>
   <xs:element name="multi_instance" type="xs:string"/>
+  <xs:element name="default_calendar" type="xs:string"/>
   <xs:element name="subdir" type="xs:string"/>
   <xs:element name="tseries_create" type="xs:string"/>
   <xs:element name="tseries_output_format" type="xs:string"/>
-  <xs:element name="tseries_output_subdir" type="xs:string"/>
   <xs:element name="tseries_tper" type="xs:string"/>
   <xs:element name="tseries_filecat_tper" type="xs:string"/>
   <xs:element name="tseries_filecat_n" type="xs:string"/>
@@ -25,7 +25,6 @@
 	<xs:element name="subdir" minOccurs="1" maxOccurs="1" />
 	<xs:element name="tseries_create" minOccurs="1" maxOccurs="1" />
 	<xs:element name="tseries_output_format"  minOccurs="1" maxOccurs="1" />
-	<xs:element name="tseries_output_subdir"  minOccurs="1" maxOccurs="1" />
 	<xs:element name="tseries_tper"  minOccurs="1" maxOccurs="1" />
         <xs:element name="tseries_filecat_tper"  minOccurs="1" maxOccurs="1" />
 	<xs:element name="tseries_filecat_n"  minOccurs="1" maxOccurs="1" />
@@ -55,6 +54,7 @@
       <xs:sequence>
 	<xs:element name="rootdir" minOccurs="1" maxOccurs="1" />
 	<xs:element name="multi_instance" minOccurs="1" maxOccurs="1" />
+	<xs:element name="default_calendar" minOccurs="1" maxOccurs="1" />
 	<xs:element name="files"  minOccurs="0" maxOccurs="unbounded" />
 	<xs:element name="tseries_time_variant_variables"  minOccurs="0" maxOccurs="1" />
       </xs:sequence>

diff --git a/Machines/machine_postprocess.xml b/Machines/machine_postprocess.xml
@@ -3,7 +3,7 @@
 <machine_postprocess>
 
   <machine name="yellowstone" hostname="yslogin">
-    <timeseries_pes queue="regular" pes_per_node="4" wallclock="02:00">128</timeseries_pes>
+    <timeseries_pes queue="regular" pes_per_node="15" wallclock="02:00">128</timeseries_pes>
     <mpi_command>mpirun.lsf</mpi_command>
     <pythonpath></pythonpath>
     <f2py fcompiler="gfortran" f77exec="/usr/bin/gfortran">f2py</f2py>
@@ -28,29 +28,30 @@
       <module>module load intel/12.1.5</module>
       <module>module load netcdf/4.3.0</module>
       <module>module load nco/4.4.4</module>
+      <module>module load netcdf4python/1.1.1</module>
       <module>module use /glade/apps/contrib/ncl-nightly/modules</module>
       <module>module load ncltest-intel</module>
     </modules>
     <components>
       <component name="atm">
-	<averages_pes queue="regular" pes_per_node="4" wallclock="00:30">128</averages_pes>
+	<averages_pes queue="regular" pes_per_node="15" wallclock="00:30">128</averages_pes>
 	<diagnostics_pes queue="geyser" pes_per_node="8" wallclock="02:00">16</diagnostics_pes>
 	<regrid_pes queue="geyser" pes_per_node="2" wallclock="02:00">6</regrid_pes>
 	<obs_root>/glade/p/cesm/amwg/amwg_data</obs_root>
       </component>
       <component name="ice">
-	<averages_pes queue="regular" pes_per_node="2" wallclock="00:30">128</averages_pes>
+	<averages_pes queue="regular" pes_per_node="15" wallclock="00:30">128</averages_pes>
 	<diagnostics_pes queue="geyser" pes_per_node="2" wallclock="01:00">4</diagnostics_pes>
 	<obs_root>/glade/p/cesm/pcwg/ice/data</obs_root>
       </component>
       <component name="lnd">
-	<averages_pes queue="regular" pes_per_node="2" wallclock="02:00">128</averages_pes>
+	<averages_pes queue="regular" pes_per_node="15" wallclock="02:00">128</averages_pes>
 	<diagnostics_pes queue="geyser" pes_per_node="4" wallclock="02:00">12</diagnostics_pes>
 	<regrid_pes queue="geyser" pes_per_node="2" wallclock="02:00">6</regrid_pes>
 	<obs_root>/glade/p/cesm/lmwg/diag/lnd_diag_data</obs_root>
       </component>
       <component name="ocn">
-	<averages_pes queue="regular" pes_per_node="8" wallclock="00:30">128</averages_pes>
+	<averages_pes queue="regular" pes_per_node="15" wallclock="00:30">128</averages_pes>
 	<diagnostics_pes queue="geyser" pes_per_node="4" wallclock="02:00">16</diagnostics_pes>
 	<obs_root>/glade/p/cesm</obs_root>
       </component>

diff --git a/Machines/yellowstone_modules b/Machines/yellowstone_modules
@@ -15,6 +15,7 @@ module load intel/12.1.5
 module load netcdf/4.3.0
 module load nco/4.4.4
 module load ncl/6.3.0
+module load netcdf4python/1.1.1
 
 # prepend the virtualenv into the PATH
 PATH=/glade/apps/contrib/virtualenv/12.0.7:${PATH}

diff --git a/Templates/batch_yellowstone.tmpl b/Templates/batch_yellowstone.tmpl
@@ -1,3 +1,42 @@
+##########
+##
+## General rules for determining PE counts and distribution across nodes
+## ---------------------------------------------------------------------
+##
+## Averages:
+##
+## For avearges, set -n equal to the number of variables to be averaged 
+## plus the number of averages to be computed. The ptile should always
+## be set to 15 on yellowstone exclusive nodes. 
+##
+## For ocean hi-resolution or atm data sets with a lot of variables, 
+## set the netcdf_format XML variable to netcdfLarge, change the queue to
+## either geyser (shared) or bigmem (exclusive). For geyser, set -n to 16
+## and ptile to 2 or more. Or, set -n < 16 and ptile to 1 which will 
+## allow for more memory usage. The -W setting may also need to be 
+## increased for large data sets.
+##
+##########
+##
+## Diagnostics:
+##
+## For diagnostics, the queue should always be set to geyser or caldera
+## with the -n not to exceed the number of plot sets to be created. 
+## The ptile can be adjusted depending on the size of the input climo
+## and average files.
+##
+##########
+##
+## Variable Time series generation:
+##
+## On the yellowstone queues, -n should be set to (number of variables)/2
+## and ptile = 15. For geyser or caldera, the maximum -n is 16 and the 
+## ptile can be adjusted based on what the memory requirements might
+## be depending on the variable size and number of history time slices
+## to be included in the final single variable output file.
+##
+##########
+
 #BSUB -n {{ pes }}
 #BSUB -R "span[ptile={{ ppn }}]"
 #BSUB -q {{ queue }}

diff --git a/averager/pp_tests/control_ocn_series.py b/averager/pp_tests/control_ocn_series.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+import sys
+
+# check the system python version and require 2.7.x or greater                                                                                                              
+if sys.hexversion < 0x02070000:
+    print(70 * '*')
+    print('ERROR: {0} requires python >= 2.7.x. '.format(sys.argv[0]))
+    print('It appears that you are running python {0}'.format(
+            '.'.join(str(x) for x in sys.version_info[0:3])))
+    print(70 * '*')
+    sys.exit(1)
+
+import os
+
+#
+# check the POSTPROCESS_PATH which must be set
+#
+try:
+    os.environ["POSTPROCESS_PATH"]
+except KeyError:
+    err_msg = ('create_postprocess ERROR: please set the POSTPROCESS_PATH environment variable.' \
+                   ' For example on yellowstone: setenv POSTPROCESS_PATH /glade/p/cesm/postprocessing')
+    raise OSError(err_msg)
+
+cesm_pp_path = os.environ["POSTPROCESS_PATH"]
+
+#
+# activate the virtual environment that was created by create_python_env
+#
+if not os.path.isfile('{0}/cesm-env2/bin/activate_this.py'.format(cesm_pp_path)):
+    err_msg = ('create_postprocess ERROR: the virtual environment cesm-env2 does not exist.' \
+                   ' Please run $POSTPROCESS_PATH/create_python_env -machine [machine name]')
+    raise OSError(err_msg)
+
+execfile('{0}/cesm-env2/bin/activate_this.py'.format(cesm_pp_path), dict(__file__='{0}/cesm-env2/bin/activate_this.py'.format(cesm_pp_path)))
+
+from pyaverager import PyAverager, specification
+
+#### User modify ####
+
+in_dir='/glade/scratch/aliceb/BRCP85C5CN_ne120_t12_pop62.c13b17.asdphys.001/ocn/proc/tseries/monthly'
+out_dir= '/glade/scratch/aliceb/BRCP85C5CN_ne120_t12_pop62.c13b17.asdphys.001/ocn/proc/tavg.2041.2050'
+pref= 'BRCP85C5CN_ne120_t12_pop62.c13b17.asdphys.001.pop.h'
+htype= 'series'
+average = ['hor.meanConcat:2041:2050']
+wght= False
+ncfrmt = 'netcdfLarge'
+serial=False
+
+#var_list = ['TEMP','SALT','PD','UVEL','VVEL','WVEL','IAGE','TAUX','TAUY','SSH','HMXL','HBLT','SFWF','PREC_F','MELT_F','MELTH_F','SHF','SHF_QSW','SENH_F','QFLUX','SNOW_F','SALT_F','EVAP_F','ROFF_F','LWUP_F','LWDN_F']
+region_nc_var = 'REGION_MASK'
+regions={1:'Sou',2:'Pac',3:'Ind',6:'Atl',8:'Lab',9:'Gin',10:'Arc',11:'Hud',0:'Glo'}
+region_wgt_var = 'TAREA'
+var_list = ['TEMP', 'SALT']
+mean_diff_rms_obs_dir = '/glade/p/cesm/omwg/timeseries_obs_tx0.1v2_62lev/'
+region_nc_var = 'REGION_MASK'
+obs_dir = '/glade/p/cesm/omwg/timeseries_obs_tx0.1v2_62lev/'
+obs_file = 'obs.nc'
+reg_obs_file_suffix = '_hor_mean_obs.nc'
+vertical_levels = 62
+
+clobber = False
+suffix = 'nc'
+date_pattern= 'yyyymm-yyyymm'
+
+#### End user modify ####
+
+pyAveSpecifier = specification.create_specifier(in_directory=in_dir,
+			          out_directory=out_dir,
+				  prefix=pref,
+                                  suffix=suffix,
+                                  date_pattern=date_pattern,
+				  hist_type=htype,
+				  avg_list=average,
+				  weighted=wght,
+				  ncformat=ncfrmt,
+                                  varlist=var_list,
+                                  serial=serial,
+                                  clobber=clobber,
+                                  mean_diff_rms_obs_dir=mean_diff_rms_obs_dir,
+                                  region_nc_var=region_nc_var,
+                                  regions=regions,
+                                  region_wgt_var=region_wgt_var,
+                                  obs_dir=obs_dir,
+                                  obs_file=obs_file,
+                                  reg_obs_file_suffix=reg_obs_file_suffix,
+                                  vertical_levels=vertical_levels)
+PyAverager.run_pyAverager(pyAveSpecifier)
+
diff --git a/averager/pp_tests/runAvg_ocn_mpi.sh b/averager/pp_tests/runAvg_ocn_mpi.sh
@@ -0,0 +1,38 @@
+#! /usr/bin/env bash
+
+#BSUB -n 6
+#BSUB -q geyser
+#BSUB -N 
+#BSUB -W 12:00
+#BSUB -R "span[ptile=1]"
+#BSUB -P P93300606
+#BSUB -o pyAve.%J.out         # output file name in which %J is replaced by the job ID
+#BSUB -e pyAve.%J.err         # error file name in which %J is replaced by the job ID
+
+. /glade/apps/opt/lmod/lmod/init/bash
+
+module restore system
+module load python/2.7.7
+
+cd /glade/p/work/aliceb/sandboxes/dev/postprocessing/cesm-env2/bin
+pwd
+. activate
+
+module load python/2.7.7
+module load numpy/1.8.1
+module load scipy/0.15.1
+module load mpi4py/2.0.0
+module load pynio/1.4.1
+module load matplotlib/1.4.3
+module load intel/12.1.5
+module load netcdf/4.3.0
+module load nco/4.4.4
+module use /glade/apps/contrib/ncl-nightly/modules
+module load ncltest-intel
+
+export POSTPROCESS_PATH=/glade/p/work/aliceb/sandboxes/dev/postprocessing
+
+mpirun.lsf /glade/p/work/aliceb/sandboxes/dev/postprocessing/averager/pp_tests/control_ocn_series.py 
+
+deactivate
+
diff --git a/averager/pyAverager/pyaverager/PyAverager.py b/averager/pyAverager/pyaverager/PyAverager.py
@@ -256,6 +256,8 @@ def compute_averages(self,spec):
 			    region_name = spec.regions[int(region_num)]
 			    # Remove the region number as part of the average name
 			    ave_descr[0] = ave_name_split[0]
+                            # get the number of vertical levels
+                            nlev = spec.vertical_levels
 			else:
 			    region_name = 'null'
 			    region_num = -99
@@ -386,7 +388,7 @@ def compute_averages(self,spec):
 					# The mean diff rsm function will send the variables once they are created 
 					var_avg_results,var_DIFF_results,var_RMS_results = climAverager.mean_diff_rms(var,region_name,region_num,spec.region_nc_var,
 					    spec.region_wgt_var,years,hist_dict,ave_t.average_types[ave_descr[0]],file_dict,obs_file,
-					    reg_obs_file,inter_comm,spec.serial,VNAME_TAG,AVE_TAG)
+					    reg_obs_file,inter_comm,spec.serial,VNAME_TAG,AVE_TAG,nlev)
 				    else:
 					if ('__metaChar' in orig_var):
 					    # Handle special meta

diff --git a/averager/pyAverager/pyaverager/climAverager.py b/averager/pyAverager/pyaverager/climAverager.py
@@ -317,7 +317,7 @@ def weighted_avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fil
     return var_Ave
 
 
-def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict):
+def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,nlev):
 
     '''
     Computes the weighted hor mean rms diff for a year  
@@ -336,6 +336,8 @@ def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist
 
     @param hist_dict   A dictionary that holds file references for all years/months. 
 
+    @param nlev        Number of ocean vertical levels
+
     @param ave_info    A dictionary of the type of average that is to be done.
                        Includes:  type, months_to_average, fn, and weights
                        (weights are not used in this function/average)
@@ -344,6 +346,7 @@ def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist
                        are needed by this average calculation.
 
     @return var_Ave    The averaged results for this variable across the designated time frame.
+
     '''
 
     # Get correct data slice from the yearly average file
@@ -357,7 +360,7 @@ def weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist
     region_mask = MA.expand_dims(slev_mask, axis=0)
     weights = MA.expand_dims(slev_weights, axis=0)
     if var_val.ndim > 2:
-        for lev in range(1,60):
+        for lev in range(1,nlev):
             new_region_mask = MA.expand_dims(slev_mask, axis=0)
             region_mask = np.vstack((region_mask,new_region_mask))
             new_weights = MA.expand_dims(slev_weights, axis=0)
@@ -404,7 +407,7 @@ def diff_var(var, avg_test_slice, obs_file):
 
     return var_Avg_diff
 
-def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,avg_test_slice,obs_file):
+def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,avg_test_slice,obs_file,nlev):
 
     '''
     Computes the weighted rms for a year  
@@ -434,6 +437,8 @@ def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dic
  
     @param obs_file       Observation file that contains the values to be used in the caluculation.
 
+    @param nlev           Number of ocean vertical levels
+
     @return nrms          The normalized rms results for this variable.
     '''
 
@@ -448,7 +453,7 @@ def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dic
     # Since weights and region mask are only one level, we need to expand them to all levels
     region_mask = MA.expand_dims(slev_mask, axis=0)
     weights = MA.expand_dims(slev_weights, axis=0)
-    for lev in range(1,60):
+    for lev in range(1,nlev):
         new_region_mask = MA.expand_dims(slev_mask, axis=0)
         region_mask = np.vstack((region_mask,new_region_mask))
         new_weights = MA.expand_dims(slev_weights, axis=0)
@@ -474,7 +479,7 @@ def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dic
 
     return nrms
 
-def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,obs_file,reg_obs_file,simplecomm,serial,MPI_TAG,AVE_TAG):
+def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,obs_file,reg_obs_file,simplecomm,serial,MPI_TAG,AVE_TAG,nlev):
 
     '''
     Computes the weighted hor mean rms diff for a year  
@@ -510,6 +515,8 @@ def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,
 
     @MPI_TAG             Integer tag used to communicate message numbers.
 
+    @param nlev          Number of ocean vertical levels
+
     @return var_Ave      The averaged results for this variable.
 
     @return var_DIFF     The difference results for this variable.
@@ -523,7 +530,7 @@ def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,
     var_rms = var+'_RMS'
 
     ## Get the masked regional average
-    var_Avg = weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year[0],hist_dict,ave_info,file_dict)
+    var_Avg = weighted_hor_avg_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year[0],hist_dict,ave_info,file_dict,nlev)
     ## Send var_Avg results to local root to write
     if (not serial):
         #md_message_v = {'name':var,'shape':var_Avg.shape,'dtype':var_Avg.dtype,'average':var_Avg}
@@ -541,7 +548,7 @@ def mean_diff_rms(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,
     ## Get the RMS from the obs diff
     var_slice = rover.fetch_slice(hist_dict,year[0],0,var,file_dict)
     temp_diff = diff_var(var, var_slice, obs_file)
-    var_RMS = weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year[0],hist_dict,ave_info,file_dict,temp_diff,obs_file)
+    var_RMS = weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year[0],hist_dict,ave_info,file_dict,temp_diff,obs_file,nlev)
     ## Send var_RMS results to local root to write
     if (not serial):
         #md_message = {'name':var_rms,'shape':var_RMS.shape,'dtype':var_RMS.dtype,'average':var_RMS}