diff --git a/.gitignore b/.gitignore index 42b70ac71..b16af3b89 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ # User defined config files -/config.analysis.* -/config.analysis_* +/config.* +!/config.default # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/README.md b/README.md index 47f154f5f..ae1ae42e1 100644 --- a/README.md +++ b/README.md @@ -12,19 +12,44 @@ This analysis repository presumes that the following python packages are availab * numpy * scipy * matplotlib - * numexpr - * ipython-notebook * netCDF4 - * progressbar - * vtk - * pyevtk with `conda install -c https://conda.anaconda.org/opengeostat pyevtk` - * cartopy with `conda install -c scitools cartopy` - * xarray + * xarray ≥ 0.9.1 * dask * bottleneck + * basemap + * lxml + * nco You can easily install them via the conda command: ``` -conda install -c scitools -c https://conda.anaconda.org/opengeostat numpy scipy matplotlib ipython notebook netCDF4 progressbar vtk cartopy xarray dask bottleneck pyevtk numexpr +conda config --add channels conda-forge +conda install numpy scipy matplotlib netCDF4 xarray dask bottleneck basemap lxml nco ``` + +## Running the analysis + 1. Create and empty config file (say `config.myrun`) or copy one of the + example files in the `configs` directory. + 2. Copy and modify any config options you want to change from + `config.default` into your new config file. + + **Requirements for custom config files:** + * At minimum you should set `baseDirectory` under `[output]` to the folder + where output is stored. **NOTE** this value should be a unique + directory for each run being analyzed. If multiple runs are analyzed in + the same directory, cached results from a previous analysis will not be + updated correctly. + * Any options you copy into the config file **must** include the + appropriate section header (e.g. '[run]' or '[output]') + * The entire `config.default` does not need to be used. This fill will + automatically be used for any options you do not include in your custom + config file. + * Given the automatic sourcing of `config.default` you should **not** + alter `config.default` directly. + 3. run: `./run_analysis.py config.myrun`. This will read the configuraiton + first from `config.default` and then replace that configuraiton with any + changes from from `config.myrun` + 4. If you want to run a subset of the analysis, you can either set the + `generate` option under `[output]` in your config file or use the + `--generate` flag on the command line. See the comments in + `config.default` for more details on this option. diff --git a/ci/requirements-py27.yml b/ci/requirements-py27.yml index ad1e63edb..3d5279cf1 100644 --- a/ci/requirements-py27.yml +++ b/ci/requirements-py27.yml @@ -1,4 +1,6 @@ name: test_env +channels: + - conda-forge dependencies: - python=2.7 - pytest @@ -8,3 +10,7 @@ dependencies: - xarray - matplotlib - dask + - netcdf4 + - hdf5 + - hdf4 + - nco diff --git a/config.analysis b/config.analysis deleted file mode 100644 index f664ff72a..000000000 --- a/config.analysis +++ /dev/null @@ -1,264 +0,0 @@ -[case] -# ACME case configuration -casename = 20160805v0atm.A_WCYCL1850_v0atm.ne30_oEC.edison.alpha7_00 -native_res = ne30 -short_term_archive = 0 -ref_casename_v0 = B1850C5_ne30_v0.4 - -[input] -# names of namelist and streams files -ocean_namelist_filename = mpas-o_in -ocean_streams_filename = streams.ocean -seaice_namelist_filename = mpas-cice_in -seaice_streams_filename = streams.cice - -[paths] -# paths to simulation and observational datasets -archive_dir = /scratch1/scratchdirs/petercal/ACME_simulations -archive_dir_ocn = /scratch1/scratchdirs/petercal/ACME_simulations/20160805v0atm.A_WCYCL1850_v0atm.ne30_oEC.edison.alpha7_00/run -scratch_dir = /global/project/projectdirs/acme/xylar/20160805v0atm.A_WCYCL1850_v0atm.ne30_oEC.edison.alpha7_00.test.pp -plots_dir = /global/project/projectdirs/acme/xylar/coupled_diagnostics_20160805v0atm.A_WCYCL1850_v0atm.ne30_oEC.edison.alpha7_00-20160520.A_WCYCL1850.ne30_oEC.edison.alpha6_01 -log_dir = /global/project/projectdirs/acme/xylar/coupled_diagnostics_20160805v0atm.A_WCYCL1850_v0atm.ne30_oEC.edison.alpha7_00-20160520.A_WCYCL1850.ne30_oEC.edison.alpha6_01.logs -obs_ocndir = /global/project/projectdirs/acme/observations/Ocean -obs_sstdir = /global/project/projectdirs/acme/observations/Ocean/SST -obs_sssdir = /global/project/projectdirs/acme/observations/Ocean/SSS -obs_mlddir = /global/project/projectdirs/acme/observations/Ocean/MLD -obs_seaicedir = /global/project/projectdirs/acme/observations/SeaIce -ref_archive_v0_ocndir = /global/project/projectdirs/acme/ACMEv0_lowres/B1850C5_ne30_v0.4/ocn/postprocessing -ref_archive_v0_seaicedir = /global/project/projectdirs/acme/ACMEv0_lowres/B1850C5_ne30_v0.4/ice/postprocessing - -[data] -# paths to mesh and mapping files -mpas_meshfile = /global/project/projectdirs/acme/milena/MPAS-grids/ocn/gridfile.oEC60to30.nc -mpas_remapfile = /global/project/projectdirs/acme/mapping/maps/map_oEC60to30_TO_0.5x0.5degree_blin.160412.nc -pop_remapfile = /global/project/projectdirs/acme/mapping/maps/map_gx1v6_TO_0.5x0.5degree_blin.160413.nc -# path to climotology dataset -mpas_climodir = /global/project/projectdirs/acme/xylar/20160805v0atm.A_WCYCL1850_v0atm.ne30_oEC.edison.alpha7_00.test.pp - -[seaIceData] -# paths to sea-ice observational datasets -obs_iceareaNH = /global/project/projectdirs/acme/observations/SeaIce/IceArea_timeseries/iceAreaNH_climo.nc -obs_iceareaSH = /global/project/projectdirs/acme/observations/SeaIce/IceArea_timeseries/iceAreaSH_climo.nc -obs_icevolNH = /global/project/projectdirs/acme/observations/SeaIce/PIOMAS/PIOMASvolume_monthly_climo.nc -obs_icevolSH = none - -[time] -# the first year over which to average climotologies -climo_yr1 = 6 -# the last year over which to average climotologies -climo_yr2 = 10 -# the offset year to be added to simulation years -yr_offset = 1849 -# start and end years for timeseries analysis -timeseries_yr1 = 1 -timeseries_yr2 = 9999 - -[ohc_timeseries] -generate = 1 - -[sst_timeseries] -generate = 1 - -[nino34_timeseries] -generate = 0 - -[mht_timeseries] -generate = 0 - -[moc_timeseries] -generate = 0 - -[sst_modelvsobs] -generate = 1 - -[sss_modelvsobs] -generate = 1 - -[mld_modelvsobs] -generate = 1 - -[seaice_timeseries] -generate = 1 - -[seaice_modelvsobs] -generate = 1 - -[ohc_timeseries] -## compare to output from another model run? -#compare_with_model = True -# compare to observations? -compare_with_obs = False -# list of region indices to plot from the region list below -regionIndicesToPlot = [6] -# Number of points over which to compute moving average (e.g., for monthly -# output, N_movavg=12 corresponds to a 12-month moving average window) -N_movavg = 12 - -[sst_timeseries] -## compare to output from another model run? -#compare_with_model = True -# compare to observations? -compare_with_obs = True -# list of region indices to plot from the region list below -regionIndicesToPlot = [6] -# Number of points over which to compute moving average (e.g., for monthly -# output, N_movavg=12 corresponds to a 12-month moving average window) -N_movavg = 12 - -[nino34_timeseries] -## compare to output from another model run? -#compare_with_model = True -# compare to observations? -compare_with_obs = True -# Number of points over which to compute moving average (e.g., for monthly -# output, N_movavg=12 corresponds to a 12-month moving average window) -N_movavg = 12 - -[mht_timeseries] -## compare to output from another model run? -#compare_with_model = True -# compare to observations? -compare_with_obs = True -# Number of points over which to compute moving average (e.g., for monthly -# output, N_movavg=12 corresponds to a 12-month moving average window) -N_movavg = 12 - -[moc_timeseries] -## compare to output from another model run? -#compare_with_model = True -# compare to observations? -compare_with_obs = True -# Number of points over which to compute moving average (e.g., for monthly -# output, N_movavg=12 corresponds to a 12-month moving average window) -N_movavg = 12 - -[seaice_timeseries] -## compare to output from another model run? -#compare_with_model = True -# compare to observations? -compare_with_obs = True -# Number of points over which to compute moving average (e.g., for monthly -# output, N_movavg=12 corresponds to a 12-month moving average window) -N_movavg = 1 -# title font properties -title_font_size = 18 - -[sst_modelvsobs] -# colormap for model/observations -#cmapModelObs = viridis -cmapModelObs = RdYlBu_r -# colormap for differences -#cmapDiff = RdBu_r -cmapDiff = coolwarm - -# indices into cmapModelObs for contour color -cmapIndicesModelObs = [0, 40, 80, 110, 140, 170, 200, 230, 255] -# indices into cmapModelObs for contour color -cmapIndicesDiff = [0, 40, 80, 120, 140, 170, 210, 255] -#cmapIndicesDiff = [0, 40, 80, 127, 170, 210, 255] # good for RdBu_r - -# colormap levels/values for contour boundaries -clevsModelObs = [-2, 0, 2, 6, 10, 16, 22, 26, 28, 32] -clevsDiff = [-5, -3, -2, -1, 0, 1, 2, 3, 5] -#clevsDiff = [-3, -2, -1, -0.5, 0.5, 1, 2, 3] # good for RdBu_r - -# Times for comparison times (Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec, JFM, AMJ, JAS, OND, ANN) -comparisonTimes = ['JFM', 'JAS', 'ANN'] - -[sss_modelvsobs] -# colormap for model/observations -#cmapModelObs = viridis -cmapModelObs = RdYlBu_r -# colormap for differences -#cmapDiff = RdBu_r -cmapDiff = coolwarm - -# indices into cmapModelObs for contour color -cmapIndicesModelObs = [0,40,80,110,140,170,200,230,255] -# indices into cmapModelObs for contour color -cmapIndicesDiff = [0,40,80,120,140,170,210,255] -#cmapIndicesDiff = [0,40,80,127,170,210,255] # good for RdBu_r - -# colormap levels/values for contour boundaries -clevsModelObs = [28,29,30,31,32,33,34,35,36,38] -clevsDiff = [-3,-2,-1,-0.5,0.5,1,2,3] -#clevsDiff = [-3,-2,-1,-0.5,0.5,1,2,3] # good for RdBu_r - -# Times for comparison times (Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,JFM,AMJ,JAS,OND,ANN) -comparisonTimes = ['JFM','JAS','ANN'] - -[mld_modelvsobs] -# colormap for model/observations -cmapModelObs = viridis -#cmapModelObs = RdYlBu_r -# colormap for differences -cmapDiff = RdBu_r -#cmapDiff = coolwarm - -# indices into cmapModelObs for contour color -cmapIndicesModelObs = [0, 40, 80, 110, 140, 170, 200, 230, 255] -# indices into cmapModelObs for contour color -cmapIndicesDiff = [0, 40, 80, 120, 140, 170, 210, 255] -#cmapIndicesDiff = [0, 40, 80, 127, 170, 210, 255] # good for RdBu_r - -# colormap levels/values for contour boundaries -clevsModelObs = [0, 20, 40, 60, 80, 100, 150, 200, 400, 800] -clevsDiff = [-175, -125, -75, -25, -10, 10, 25, 75, 125, 175] - -# Times for comparison times (Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec, JFM, AMJ, JAS, OND, ANN) -comparisonTimes = ['JFM', 'JAS', 'ANN'] - -[seaice_modelvsobs] -# colormap for model/observations -cmapModelObs = inferno -# colormap for differences -cmapDiff = RdBu_r - -# indices into cmapModelObs for contour color -cmapIndicesModelObs = [20, 80, 110, 140, 170, 200, 230, 255] -# indices into cmapModelObs for contour color -cmapIndicesDiff = [0, 40, 80, 127, 127, 170, 210, 255] - -# colormap levels/values for contour boundaries (ice conncentration winter) -clevsModelObs_conc_win = [0.15, 0.4, 0.7, 0.9, 0.94, 0.96, 0.98, 0.99, 1] -clevsDiff_conc_win = [-0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8] - -# colormap levels/values for contour boundaries (ice conncentration summer) -clevsModelObs_conc_sum = [0.15, 0.3, 0.5, 0.7, 0.8, 0.85, 0.9, 0.95, 1] -clevsDiff_conc_sum = [-0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8] - -# colormap levels/values for contour boundaries (ice thickness NH) -clevsModelObs_thick_NH = [0, 0.25, 0.5, 1, 1.5, 2, 2.5, 3, 3.5] -clevsDiff_thick_NH = [-2.5, -2, -0.5, -0.1, 0, 0.1, 0.5, 2, 2.5] - -# colormap levels/values for contour boundaries (ice thickness SH) -clevsModelObs_thick_SH = [0, 0.2, 0.4, 0.6, 0.8, 1, 1.5, 2, 2.5] -clevsDiff_thick_SH = [-2.5, -2, -0.5, -0.1, 0, 0.1, 0.5, 2, 2.5] - -# reference lat/lon for sea ice plots in the NH -latmin_NH = 50 -lon0_NH = 0 -# reference lat/lon for sea ice plots in the SH -latmin_SH = -50 -lon0_SH = 180 - -[regions] -# list of region names (needs to be in the same order as region indices in -# time-series stats) -regions = ['arctic', 'equatorial', 'so', 'nino3', 'nino4', 'nino3.4', 'global'] -# list of plot titles (needs to be in the same order as region indices in -# time-series stats) -plot_titles = ['Arctic', 'Equatorial (15S-15N)', 'Southern Ocean', 'Nino 3', 'Nino 4', 'Nino 3.4', 'Global Ocean'] - -[plot] -# set to true if you want plots to be displayed, rather than just written out -# Note: displayToScreen = True seems to hang on Edison on large data sets, -# so suggested use is just for debugging either locally or with small data sets -displayToScreen = False - -# font size on axes -axis_font_size = 16 -# title font properties -title_font_size = 20 -title_font_color = black -title_font_weight = normal diff --git a/config.default b/config.default new file mode 100644 index 000000000..e095ef910 --- /dev/null +++ b/config.default @@ -0,0 +1,560 @@ +## This file contains the default values of all possible configuration options +## used to run analysis tasks. Do not modify options in this file direct. +## Instead, follow this procedure: +## 1. Create and empty config file (say config.myrun) or copy one of the +## example files in the configs directory. +## 2. Copy and modify any config options you want to change from this file into +## into your new config file. Make sure they have the right section name +## (e.g. [run] or [output]). If nothing esle, you will need to set +## baseDirectory under [output] to the folder where output should be stored. +## 3. run: ./run_analysis.py config.myrun. This will read the configuraiton +## first from this file and then replace that configuraiton with any +## changes from from config.myrun +## 4. If you want to run a subset of the analysis, you can either set the +## generate option under [output] in your config file or use the +## --generate flag on the command line. See the comments for 'generate' +## in the '[output]' section below for more details on this option. + + +[runs] +## options related to the run to be analyzed and reference runs to be +## compared against + +# mainRunName is a name that identifies the simulation being analyzed. +mainRunName = runName +# referenceRunName is the name of a reference run to compare against (or None +# to turn off comparison with a reference, e.g. if no reference case is +# available) +referenceRunName = None +# preprocessedReferenceRunName is the name of a reference run that has been +# preprocessed to compare against (or None to turn off comparison). Reference +# runs of this type would have preprocessed results because they were not +# performed with MPAS components (so they cannot be easily ingested by +# MPAS-Analysis) +preprocessedReferenceRunName = None + +[input] +## options related to reading in the results to be analyzed + +# directory containing model results +baseDirectory = /dir/to/model/output + +# Note: an absolute path can be supplied for any of these subdirectories. +# A relative path is assumed to be relative to baseDirectory. +# By default, results are assumed to be directly in baseDirectory, +# i.e. /./ + +# subdirectory containing restart files +runSubdirectory = . +# subdirectory for ocean history files +oceanHistorySubdirectory = . +# subdirectory for sea ice history files +seaIceHistorySubdirectory = . + +# names of namelist and streams files, either a path relative to baseDirectory +# or an absolute path. +oceanNamelistFileName = mpas-o_in +oceanStreamsFileName = streams.ocean +seaIceNamelistFileName = mpas-cice_in +seaIceStreamsFileName = streams.cice + +# names of ocean and sea ice meshes (e.g. EC60to30, QU240, RRS30to10, etc.) +mpasMeshName = mesh + +# The system has a limit to how many files can be open at one time. By +# default, xarray attempts to open all files in a data set simultaneously. +# A new option allows files to be automatically closed as a data set is being +# read to prevent hitting this limit. Here, you can set what fraction of the +# system limit of open files an analysis task is allowed to use. Note: In the +# future when multiple tasks can run simultaneously, the system file limit will +# first be divided among the tasks before applying this fraction. +autocloseFileLimitFraction = 0.5 + +# Large datasets can encounter a memory error. Specification of a maximum +# chunk size `maxChunkSize` can be helpful to prevent the memory error. The +# current maximum chunk size assumes approximately 64GB of ram and large files +# with a single time slice. +maxChunkSize = 10000 + +[output] +## options related to writing out plots, intermediate cached data sets, logs, +## etc. + +# directory where analysis should be written +# NOTE: This directory path must be specific to each test case. +baseDirectory = /dir/to/analysis/output + +# subdirectories within baseDirectory for analysis output +scratchSubdirectory = scratch +plotsSubdirectory = plots +logsSubdirectory = logs +mpasClimatologySubdirectory = clim/mpas +mpasRegriddedClimSubdirectory = clim/mpas/regridded +mappingSubdirectory = mapping +timeSeriesSubdirectory = timeseries + +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'streamfunctionMOC', +# 'timeSeriesSeaIceAreaVol', 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal +# fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all'] + +# alternative examples that would perform all analysis except +# 'timeSeriesOHC' +#generate = ['timeSeriesSST', 'streamfunctionMOC', +# 'all_regriddedHorizontal', 'all_seaIce'] +#generate = ['all', 'no_timeSeriesOHC'] +# Each subsequent list entry can be used to alter previous list entries. For +# example, the following would produce all analyses except regriddedSST, +# regriddedSSS and regriddedMLD (albeit not in a very intuitive way): +#generate = ['all', 'no_ocean', 'all_timeSeries'] + +[climatology] +## options related to producing climatologies, typically to compare against +## observations and previous runs + +# the first year over which to average climatalogies +startYear = 11 +# the last year over which to average climatalogies +endYear = 20 + +# The comparison grid resolution in degrees +comparisonLatResolution = 0.5 +comparisonLonResolution = 0.5 + +# The names of the mapping file used for interpolation. If a mapping file has +# already been generated, supplying the absolute path can save the time of +# generating a new one. If nothing is supplied, the file name is automatically +# generated based on the MPAS mesh name, the comparison grid resolution, and +# the interpolation method +# mpasMappingFile = /path/to/mapping/file + +# overwrite files when building climatologies? +overwriteMapping = False +overwriteMpasClimatology = False + +# interpolation order for model and observation results. Likely values are +# 'bilinear', 'neareststod' (nearest neighbor) or 'conserve' +mpasInterpolationMethod = bilinear + +# the number of years per cached climatology file. These cached files are +# aggregated together to create annual climatologies, for example, when +# computing the MOC. +yearsPerCacheFile = 1 + +[timeSeries] +## options related to producing time series plots, often to compare against +## observations and previous runs + +# start and end years for timeseries analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +startYear = 1 +endYear = 9999 + +[index] +## options related to producing nino index. + +# start and end years for the nino 3.4 analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +# For valid statistics, index times should include at least 30 years +startYear = 1 +endYear = 9999 + +[oceanObservations] +## options related to ocean observations with which the results will be compared + +# directory where ocean observations are stored +baseDirectory = /dir/to/ocean/observations +sstSubdirectory = SST +sssSubdirectory = SSS +mldSubdirectory = MLD +ninoSubdirectory = Nino + +# first and last year of SST observational climatology (preferably one of the +# two ranges given below) +# values for preindustrial +sstClimatologyStartYear = 1870 +sstClimatologyEndYear = 1900 +# alternative values for present day +#sstClimatologyStartYear = 1990 +#sstClimatologyEndYear = 2011 + +# The name of mapping files used for interpolating observations to the +# comparison grid. Interpolation is only performed if the observation grid has +# a different resolution from the comparison grid. If nothing is supplied, the +# file name is automatically generated based on the MPAS mesh name, the +# comparison grid resolution, and the interpolation method +# sstClimatologyMappingFile = /path/to/mapping/file +# sssClimatologyMappingFile = /path/to/mapping/file +# mldClimatologyMappingFile = /path/to/mapping/file + + +# interpolation order for observations. Likely values are +# 'bilinear', 'neareststod' (nearest neighbor) or 'conserve' +interpolationMethod = bilinear + +# The directories where observation climatologies will be stored if they need +# to be computed. If a relative path is supplied, it is relative to the output +# base directory. If an absolute path is supplied, this should point to +# cached climatology files on the desired comparison grid, in which case +# overwriteObsClimatology should be False. If cached regridded files are +# supplied, there is no need to provide cached files before regridding. +climatologySubdirectory = clim/obs +regriddedClimSubdirectory = clim/obs/regridded + +# overwrite files when building climatologies? +overwriteObsClimatology = False + +[oceanReference] +## options related to ocean reference run with which the results will be +## compared + +# directory where ocean reference simulation results are stored +baseDirectory = /dir/to/ocean/reference + +[oceanPreprocessedReference] +## options related to preprocessed ocean reference run with which the results +## will be compared (e.g. a POP, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /dir/to/ocean/reference + +[seaIceObservations] +## options related to sea ice observations with which the results will be +## compared + +# directory where sea ice observations are stored +baseDirectory = /dir/to/seaice/observations +areaNH = IceArea_timeseries/iceAreaNH_climo.nc +areaSH = IceArea_timeseries/iceAreaSH_climo.nc +volNH = PIOMAS/PIOMASvolume_monthly_climo.nc +volSH = none +concentrationNASATeamNH_JFM = SSMI/NASATeam_NSIDC0051/SSMI_NASATeam_gridded_concentration_NH_jfm.interp0.5x0.5.nc +concentrationNASATeamNH_JAS = SSMI/NASATeam_NSIDC0051/SSMI_NASATeam_gridded_concentration_NH_jas.interp0.5x0.5.nc +concentrationNASATeamSH_DJF = SSMI/NASATeam_NSIDC0051/SSMI_NASATeam_gridded_concentration_SH_djf.interp0.5x0.5.nc +concentrationNASATeamSH_JJA = SSMI/NASATeam_NSIDC0051/SSMI_NASATeam_gridded_concentration_SH_jja.interp0.5x0.5.nc +concentrationBootstrapNH_JFM = SSMI/Bootstrap_NSIDC0079/SSMI_Bootstrap_gridded_concentration_NH_jfm.interp0.5x0.5.nc +concentrationBootstrapNH_JAS = SSMI/Bootstrap_NSIDC0079/SSMI_Bootstrap_gridded_concentration_NH_jas.interp0.5x0.5.nc +concentrationBootstrapSH_DJF = SSMI/Bootstrap_NSIDC0079/SSMI_Bootstrap_gridded_concentration_SH_djf.interp0.5x0.5.nc +concentrationBootstrapSH_JJA = SSMI/Bootstrap_NSIDC0079/SSMI_Bootstrap_gridded_concentration_SH_jja.interp0.5x0.5.nc +thicknessNH_ON = ICESat/ICESat_gridded_mean_thickness_NH_on.interp0.5x0.5.nc +thicknessNH_FM = ICESat/ICESat_gridded_mean_thickness_NH_fm.interp0.5x0.5.nc +thicknessSH_ON = ICESat/ICESat_gridded_mean_thickness_SH_on.interp0.5x0.5.nc +thicknessSH_FM = ICESat/ICESat_gridded_mean_thickness_SH_fm.interp0.5x0.5.nc + +# The name of mapping files used for interpolating observations to the +# comparison grid. Interpolation is only performed if the observation grid has +# a different resolution from the comparison grid. If nothing is supplied, the +# file name is automatically generated based on the MPAS mesh name, the +# comparison grid resolution, and the interpolation method +# seaIceClimatologyMappingFile = /path/to/mapping/file + +# interpolation order for observations. Likely values are +# 'bilinear', 'neareststod' (nearest neighbor) or 'conserve' +interpolationMethod = bilinear + +# The directories where observation climatologies will be stored if they need +# to be computed. If a relative path is supplied, it is relative to the output +# base directory. If an absolute path is supplied, this should point to +# cached climatology files on the desired comparison grid, in which case +# overwriteObsClimatology should be False. If cached regridded files are +# supplied, there is no need to provide cached files before regridding. +climatologySubdirectory = clim/obs +regriddedClimSubdirectory = clim/obs/regridded + +# overwrite files when building climatologies? +overwriteObsClimatology = False + +[seaIceReference] +## options related to sea ice reference run with which the results will be +## compared + +# directory where sea ice reference simulation results are stored +baseDirectory = /dir/to/seaice/reference + +[seaIcePreprocessedReference] +## options related to preprocessed sea ice reference run with which the results +## will be compared (e.g. a CICE, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /dir/to/seaice/reference + +[timeSeriesOHC] +## options related to plotting time series of ocean heat content (OHC) + +## compare to output from another model run? +#compareWithModel = True +# compare to observations? +compareWithObservations = False +# list of region indices to plot from the region list in [regions] below +regionIndicesToPlot = [6] +# Number of points over which to compute moving average (e.g., for monthly +# output, movingAveragePoints=12 corresponds to a 12-month moving average +# window) +movingAveragePoints = 12 + +[timeSeriesSST] +## options related to plotting time series of sea surface temperature (SST) + +## compare to output from another model run? +#compareWithModel = True +# compare to observations? +compareWithObservations = True +# list of region indices to plot from the region list in [regions] below +regionIndicesToPlot = [6] +# Number of points over which to compute moving average (e.g., for monthly +# output, movingAveragePoints=12 corresponds to a 12-month moving average +# window) +movingAveragePoints = 12 + +[indexNino34] +## options related to plotting time series of the El Nino 3.4 index + +# Specified region for the Nino Index, 5 = Nino34, 3 = Nino3, 4 = Nino4 +# The indexNino34 routine only accepts one value at a time, +# regionIndicesToPlot should be an integer +regionIndicesToPlot = 5 + +# Data source to read for comparison. There are three options +# 1 - ERS SSTv4 -- Updated version of previous -- 1854 - 2016 +# 2 - HADIsst -- Hadley center analysis -- 1870 - 2016 +observationData = HADIsst + +[timeSeriesMHT] +## options related to plotting time series of meridional heat transport (MHT) + +## compare to output from another model run? +#compareWithModel = True +# compare to observations? +compareWithObservations = True +# list of region indices to plot from the region list in [regions] below +regionIndicesToPlot = [6] +# Number of points over which to compute moving average (e.g., for monthly +# output, movingAveragePoints=12 corresponds to a 12-month moving average +# window) +movingAveragePoints = 12 + +[streamfunctionMOC] +## options related to plotting the streamfunction of the meridional overturning +## circulation (MOC) + +# Region names for basin MOC calculation. +# Supported options are Atlantic and IndoPacific +regionNames = ['Atlantic'] + +# Mask file for post-processing regional MOC computation +regionMaskFiles = /path/to/MOCregional/mapping/file + +# xarray (with dask) divides data sets into "chunks", allowing computations +# to be made on data that is larger than the available memory. MPAS-Analysis +# supports setting a maximum chunk size for data sets generally, and a +# separate option specific to loading the 3D velocity field in the MOC +# specifically. By default, maxChunkSize is left undefined, so that chunking +# is handled automatically. If the MOC calculation encounters memory problems, +# consider setting maxChunkSize to a number significantly lower than nEdges +# in your MPAS mesh so that the calculation will be divided into smaller +# pieces. +# maxChunkSize = 1000 + +# Size of latitude bins over which MOC streamfunction is integrated +latBinSizeGlobal = 1. +latBinSizeAtlantic = 0.5 +latBinSizeIndoPacific = 0.5 + +# colormap for model results +colormapNameGlobal = RdYlBu_r +colormapNameAtlantic = RdYlBu_r +colormapNameIndoPacific = RdYlBu_r +# colormap indices for contour color +colormapIndicesGlobal = [0, 40, 80, 110, 140, 170, 200, 230, 255] +colormapIndicesAtlantic = [0, 40, 80, 110, 140, 170, 200, 230, 255] +colormapIndicesIndoPacific = [0, 40, 80, 110, 140, 170, 200, 230, 255] +# colorbar levels/values for contour boundaries +colorbarLevelsGlobal = [-20, -10, -5, -2, 2, 5, 10, 20, 30, 40] +colorbarLevelsAtlantic = [-10, -5, -2, 0, 5, 8, 10, 14, 18, 22] +colorbarLevelsIndoPacific = [-10, -5, -2, 0, 5, 8, 10, 14, 18, 22] +# contour line levels +contourLevelsGlobal = np.arange(-25.1,35.1,10) +contourLevelsAtlantic = np.arange(-8,20.1,2) +contourLevelsIndoPacific = np.arange(-8,20.1,2) + +## compare to output from another model run? +#compareWithModel = True +# compare to observations? +compareWithObservations = True + +# Number of points over which to compute moving average for +# MOC timeseries (e.g., for monthly output, movingAveragePoints=12 +# corresponds to a 12-month moving average window) +movingAveragePoints = 12 + +[timeSeriesSeaIceAreaVol] +## options related to plotting time series of sea ice area and volume + +## compare to output from another model run? +#compareWithModel = True +# compare to observations? +compareWithObservations = True +# list of region indices to plot from the region list in [regions] below +regionIndicesToPlot = [6] +# Number of points over which to compute moving average (e.g., for monthly +# output, movingAveragePoints=12 corresponds to a 12-month moving average +# window) +movingAveragePoints = 1 +# title font properties +titleFontSize = 18 +# plot on polar plot +polarPlot = False + +[regriddedSST] +## options related to plotting horizontally regridded sea surface temperature +## (SST) against reference model results and observations + +# colormap for model/observations +colormapNameResult = RdYlBu_r +# color indices into colormapName for filled contours +colormapIndicesResult = [0, 40, 80, 110, 140, 170, 200, 230, 255] +# colormap levels/values for contour boundaries +colorbarLevelsResult = [-2, 0, 2, 6, 10, 16, 22, 26, 28, 32] + +# colormap for differences +colormapNameDifference = RdBu_r +# color indices into colormapName for filled contours +colormapIndicesDifference = [0, 28, 57, 85, 113, 142, 170, 198, 227, 255] +# colormap levels/values for contour boundaries +colorbarLevelsDifference = [-5, -3, -2, -1, 0, 1, 2, 3, 5] + +# Times for comparison times (Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec, JFM, AMJ, JAS, OND, ANN) +comparisonTimes = ['JFM', 'JAS', 'ANN'] + +[regriddedSSS] +## options related to plotting horizontally regridded sea surface salinity +## (SSS) against reference model results and observations + +# colormap for model/observations +colormapNameResult = RdYlBu_r +# color indices into colormapName for filled contours +colormapIndicesResult = [0, 40, 80, 110, 140, 170, 200, 230, 255] +# colormap levels/values for contour boundaries +colorbarLevelsResult = [28, 29, 30, 31, 32, 33, 34, 35, 36, 38] + +# colormap for differences +colormapNameDifference = RdBu_r +# color indices into colormapName for filled contours +colormapIndicesDifference = [0, 28, 57, 85, 113, 142, 170, 198, 227, 255] +# colormap levels/values for contour boundaries +colorbarLevelsDifference = [-3, -2, -1, -0.5, 0, 0.5, 1, 2, 3] + +# Times for comparison times (Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec, JFM, AMJ, JAS, OND, ANN) +comparisonTimes = ['JFM', 'JAS', 'ANN'] + +[regriddedMLD] +## options related to plotting horizontally regridded mixed layer depth +## (MLD) against reference model results and observations + +# colormap for model/observations +colormapNameResult = viridis +# color indices into colormapName for filled contours +colormapIndicesResult = [0, 40, 80, 110, 140, 170, 200, 230, 255] +# colormap levels/values for contour boundaries +colorbarLevelsResult = [0, 20, 40, 60, 80, 100, 150, 200, 400, 800] + +# colormap for differences +colormapNameDifference = RdBu_r +# color indices into colormapName for filled contours +colormapIndicesDifference = [0, 28, 57, 85, 113, 142, 170, 198, 227, 255] +# colormap levels/values for contour boundaries +colorbarLevelsDifference = [-150, -80, -30, -10, 0, 10, 30, 80, 150] + +# Times for comparison times (Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec, JFM, AMJ, JAS, OND, ANN) +comparisonTimes = ['JFM', 'JAS', 'ANN'] + +[regriddedSeaIceConcThick] +## options related to plotting horizontally regridded sea ice concentration +## and thickness against reference model results and observations + +# colormap for model/observations +colormapNameConcResultWinter = inferno +colormapNameConcResultSummer = inferno +colormapNameThickResultNH = inferno +colormapNameThickResultSH = inferno +# color indices into colormapName for filled contours +colormapIndicesConcResultWinter = [20, 80, 110, 140, 170, 200, 230, 255] +colormapIndicesConcResultSummer = [20, 80, 110, 140, 170, 200, 230, 255] +colormapIndicesThickResultNH = [20, 80, 110, 140, 170, 200, 230, 255] +colormapIndicesThickResultSH = [20, 80, 110, 140, 170, 200, 230, 255] +# colormap levels/values for contour boundaries for: +# concentration in winter and summer +colorbarLevelsConcResultWinter = [0.15, 0.4, 0.7, 0.9, 0.94, 0.96, 0.98, 0.99, 1] +colorbarLevelsConcResultSummer = [0.15, 0.3, 0.5, 0.7, 0.8, 0.85, 0.9, 0.95, 1] +# thickness in the northern and southern hemispheres +colorbarLevelsThickResultNH = [0, 0.25, 0.5, 1, 1.5, 2, 2.5, 3, 3.5] +colorbarLevelsThickResultSH = [0, 0.2, 0.4, 0.6, 0.8, 1, 1.5, 2, 2.5] + +# colormap for differences +colormapNameConcDifferenceWinter = RdBu_r +colormapNameConcDifferenceSummer = RdBu_r +colormapNameThickDifferenceNH = RdBu_r +colormapNameThickDifferenceSH = RdBu_r +# color indices into colormapName for filled contours +colormapIndicesConcDifferenceWinter = [0, 40, 80, 127, 127, 170, 210, 255] +colormapIndicesConcDifferenceSummer = [0, 40, 80, 127, 127, 170, 210, 255] +colormapIndicesThickDifferenceNH = [0, 40, 80, 127, 127, 170, 210, 255] +colormapIndicesThickDifferenceSH = [0, 40, 80, 127, 127, 170, 210, 255] +# colormap levels/values for contour boundaries for: +# concentration in winter and summer +colorbarLevelsConcDifferenceWinter = [-0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8] +colorbarLevelsConcDifferenceSummer = [-0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8] +# thickness in the northern and southern hemispheres +colorbarLevelsThickDifferenceNH = [-2.5, -2, -0.5, -0.1, 0, 0.1, 0.5, 2, 2.5] +colorbarLevelsThickDifferenceSH = [-2.5, -2, -0.5, -0.1, 0, 0.1, 0.5, 2, 2.5] + +# reference lat/lon for sea ice plots in the northern hemisphere +minimumLatitudeNH = 50 +referenceLongitudeNH = 0 +# reference lat/lon for sea ice plots in the southern hemisphere +minimumLatitudeSH = -50 +referenceLongitudeSH = 180 + +[regions] +## options related to ocean regions used in several analysis modules + +# list of region names (needs to be in the same order as region indices in +# time-series stats) +regions = ['arctic', 'equatorial', 'so', 'nino3', 'nino4', 'nino3.4', 'global'] +# list of plot titles (needs to be in the same order as region indices in +# time-series stats) +plotTitles = ['Arctic', 'Equatorial (15S-15N)', 'Southern Ocean', 'Nino 3', 'Nino 4', 'Nino 3.4', 'Global Ocean'] + +[plot] +## options related to plotting that are the defaults across all analysis modules + +# set to true if you want plots to be displayed (one by one) to the screen in +# addition to being written out to png files +# Note: displayToScreen = True seems to hang on Edison on large data sets, +# so suggested use is just for debugging either locally or with small data sets +displayToScreen = False + +# font size on axes +axisFontSize = 16 +# title font properties +titleFontSize = 20 +titleFontColor = black +titleFontWeight = normal diff --git a/configs/README.md b/configs/README.md new file mode 100644 index 000000000..c2edd326c --- /dev/null +++ b/configs/README.md @@ -0,0 +1,9 @@ +# MPAS-Analysis + +Example config files for various HPC machines and various runs. + +The intended usage is to copy one of these examples to the root of +MPAS-Analysis (where `run_analysis.py` is located) before modifying them +(e.g. setting the output `baseDirectory`) and using them to run the +analysis. + diff --git a/configs/edison/config.20161006bugfix.alpha8.A_WCYCL1850S.ne30_oEC_ICG.edison b/configs/edison/config.20161006bugfix.alpha8.A_WCYCL1850S.ne30_oEC_ICG.edison new file mode 100644 index 000000000..18a6b27ff --- /dev/null +++ b/configs/edison/config.20161006bugfix.alpha8.A_WCYCL1850S.ne30_oEC_ICG.edison @@ -0,0 +1,137 @@ +[runs] +## options related to the run to be analyzed and reference runs to be +## compared against + +# mainRunName is a name that identifies the simulation being analyzed. +mainRunName = 20161006bugfix.alpha8.A_WCYCL1850S.ne30_oEC_ICG.edison +# preprocessedReferenceRunName is the name of a reference run that has been +# preprocessed to compare against (or None to turn off comparison). Reference +# runs of this type would have preprocessed results because they were not +# performed with MPAS components (so they cannot be easily ingested by +# MPAS-Analysis) +preprocessedReferenceRunName = B1850C5_ne30_v0.4 + +[input] +## options related to reading in the results to be analyzed + +# directory containing model results +baseDirectory = /global/cscratch1/sd/jonbob/ACME_simulations/20161006bugfix.alpha8.A_WCYCL1850S.ne30_oEC_ICG.edison/run + +# names of ocean and sea ice meshes (e.g. EC60to30, QU240, RRS30to10, etc.) +mpasMeshName = EC60to30 + +[output] +## options related to writing out plots, intermediate cached data sets, logs, +## etc. + +# directory where analysis should be written +baseDirectory = /dir/to/analysis/output + +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', +# 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal +# fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all'] + +# alternative examples that would perform all analysis except +# 'timeSeriesOHC' +#generate = ['timeSeriesSST', 'all_regriddedHorizontal', 'all_seaIce'] +#generate = ['all', 'no_timeSeriesOHC'] +# Each subsequent list entry can be used to alter previous list entries. For +# example, the following would produce all analyses except regriddedSST, +# regriddedSSS and regriddedMLD (albeit not in a very intuitive way): +#generate = ['all', 'no_ocean', 'all_timeSeries'] + +[climatology] +## options related to producing climatologies, typically to compare against +## observations and previous runs + +# the first year over which to average climatalogies +startYear = 6 +# the last year over which to average climatalogies +endYear = 10 + +# already been generated, supplying the absolute path can save the time of +# generating a new one. If nothing is supplied, the file name is automatically +# generated based on the MPAS mesh name, the comparison grid resolution, and +# the interpolation method +mpasMappingFile = /global/project/projectdirs/acme/mapping/maps/map_oEC60to30_TO_0.5x0.5degree_blin.160412.nc + +[timeSeries] +## options related to producing time series plots, often to compare against +## observations and previous runs + +# start and end years for timeseries analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +startYear = 1 +endYear = 51 + +[index] +## options related to producing nino index. + +# start and end years for the nino 3.4 analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +# For valid statistics, index times should include at least 30 years +startYear = 1 +endYear = 9999 + +[oceanObservations] +## options related to ocean observations with which the results will be compared + +# directory where ocean observations are stored +baseDirectory = /global/project/projectdirs/acme/observations/Ocean/ +sstSubdirectory = SST +sssSubdirectory = SSS +mldSubdirectory = MLD + +[oceanPreprocessedReference] +## options related to preprocessed ocean reference run with which the results +## will be compared (e.g. a POP, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /global/project/projectdirs/acme/ACMEv0_lowres/B1850C5_ne30_v0.4/ocn/postprocessing + +[seaIceObservations] +## options related to sea ice observations with which the results will be +## compared + +# directory where sea ice observations are stored +baseDirectory = /global/project/projectdirs/acme/observations/SeaIce + +[seaIcePreprocessedReference] +## options related to preprocessed sea ice reference run with which the results +## will be compared (e.g. a CICE, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /global/project/projectdirs/acme/ACMEv0_lowres/B1850C5_ne30_v0.4/ice/postprocessing + +[timeSeriesSeaIceAreaVol] +## options related to plotting time series of sea ice area and volume + +# plot on polar plot +polarPlot = False + +[streamfunctionMOC] +## options related to plotting the streamfunction of the meridional overturning +## circulation (MOC) +maxChunkSize = 1000 + +# Mask file for ocean basin regional computation +regionMaskFiles = /global/project/projectdirs/acme/mapping/grids/EC60to30v1_SingleRegionAtlanticWTransportTransects_masks.nc diff --git a/configs/edison/config.20161117.beta0.A_WCYCL1850.ne30_oEC.edison b/configs/edison/config.20161117.beta0.A_WCYCL1850.ne30_oEC.edison new file mode 100644 index 000000000..57918dfd9 --- /dev/null +++ b/configs/edison/config.20161117.beta0.A_WCYCL1850.ne30_oEC.edison @@ -0,0 +1,138 @@ +[runs] +## options related to the run to be analyzed and reference runs to be +## compared against + +# mainRunName is a name that identifies the simulation being analyzed. +mainRunName = 20161117.beta0.A_WCYCL1850.ne30_oEC.edison +# preprocessedReferenceRunName is the name of a reference run that has been +# preprocessed to compare against (or None to turn off comparison). Reference +# runs of this type would have preprocessed results because they were not +# performed with MPAS components (so they cannot be easily ingested by +# MPAS-Analysis) +preprocessedReferenceRunName = B1850C5_ne30_v0.4 + +[input] +## options related to reading in the results to be analyzed + +# directory containing model results +baseDirectory = /scratch2/scratchdirs/golaz/ACME_simulations/20161117.beta0.A_WCYCL1850.ne30_oEC.edison/run + +# names of ocean and sea ice meshes (e.g. EC60to30, QU240, RRS30to10, etc.) +mpasMeshName = EC60to30 + +[output] +## options related to writing out plots, intermediate cached data sets, logs, +## etc. + +# directory where analysis should be written +baseDirectory = /dir/to/analysis/output + +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', +# 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal +# fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all'] + +# alternative examples that would perform all analysis except +# 'timeSeriesOHC' +#generate = ['timeSeriesSST', 'all_regriddedHorizontal', 'all_seaIce'] +#generate = ['all', 'no_timeSeriesOHC'] +# Each subsequent list entry can be used to alter previous list entries. For +# example, the following would produce all analyses except regriddedSST, +# regriddedSSS and regriddedMLD (albeit not in a very intuitive way): +#generate = ['all', 'no_ocean', 'all_timeSeries'] + +[climatology] +## options related to producing climatologies, typically to compare against +## observations and previous runs + +# the first year over which to average climatalogies +startYear = 41 +# the last year over which to average climatalogies +endYear = 50 + +# The names of the mapping file used for interpolation. If a mapping file has +# already been generated, supplying the absolute path can save the time of +# generating a new one. If nothing is supplied, the file name is automatically +# generated based on the MPAS mesh name, the comparison grid resolution, and +# the interpolation method +mpasMappingFile = /global/project/projectdirs/acme/mapping/maps/map_oEC60to30_TO_0.5x0.5degree_blin.160412.nc + +[timeSeries] +## options related to producing time series plots, often to compare against +## observations and previous runs + +# start and end years for timeseries analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +startYear = 1 +endYear = 51 + +[index] +## options related to producing nino index. + +# start and end years for the nino 3.4 analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +# For valid statistics, index times should include at least 30 years +startYear = 1 +endYear = 9999 + +[oceanObservations] +## options related to ocean observations with which the results will be compared + +# directory where ocean observations are stored +baseDirectory = /global/project/projectdirs/acme/observations/Ocean/ +sstSubdirectory = SST +sssSubdirectory = SSS +mldSubdirectory = MLD + +[oceanPreprocessedReference] +## options related to preprocessed ocean reference run with which the results +## will be compared (e.g. a POP, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /global/project/projectdirs/acme/ACMEv0_lowres/B1850C5_ne30_v0.4/ocn/postprocessing + +[seaIceObservations] +## options related to sea ice observations with which the results will be +## compared + +# directory where sea ice observations are stored +baseDirectory = /global/project/projectdirs/acme/observations/SeaIce + +[seaIcePreprocessedReference] +## options related to preprocessed sea ice reference run with which the results +## will be compared (e.g. a CICE, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /global/project/projectdirs/acme/ACMEv0_lowres/B1850C5_ne30_v0.4/ice/postprocessing + +[timeSeriesSeaIceAreaVol] +## options related to plotting time series of sea ice area and volume + +# plot on polar plot +polarPlot = False + +[streamfunctionMOC] +## options related to plotting the streamfunction of the meridional overturning +## circulation (MOC) +maxChunkSize = 1000 + +# Mask file for ocean basin regional computation +regionMaskFiles = /global/project/projectdirs/acme/mapping/grids/EC60to30v1_SingleRegionAtlanticWTransportTransects_masks.nc diff --git a/configs/edison/config.20170313.beta1.A_WCYCL1850S.ne30_oECv3_ICG.edison b/configs/edison/config.20170313.beta1.A_WCYCL1850S.ne30_oECv3_ICG.edison new file mode 100644 index 000000000..67cae4f4d --- /dev/null +++ b/configs/edison/config.20170313.beta1.A_WCYCL1850S.ne30_oECv3_ICG.edison @@ -0,0 +1,138 @@ +[runs] +## options related to the run to be analyzed and reference runs to be +## compared against + +# mainRunName is a name that identifies the simulation being analyzed. +mainRunName = 20170313.beta1.A_WCYCL1850S.ne30_oECv3_ICG.edison +# preprocessedReferenceRunName is the name of a reference run that has been +# preprocessed to compare against (or None to turn off comparison). Reference +# runs of this type would have preprocessed results because they were not +# performed with MPAS components (so they cannot be easily ingested by +# MPAS-Analysis) +preprocessedReferenceRunName = B1850C5_ne30_v0.4 + +[input] +## options related to reading in the results to be analyzed + +# directory containing model results +baseDirectory = /scratch2/scratchdirs/golaz/ACME_simulations/20170313.beta1.A_WCYCL1850S.ne30_oECv3_ICG.edison/run + +# names of ocean and sea ice meshes (e.g. EC60to30, QU240, RRS30to10, etc.) +mpasMeshName = EC60to30v3 + +[output] +## options related to writing out plots, intermediate cached data sets, logs, +## etc. + +# directory where analysis should be written +baseDirectory = /dir/to/analysis/output + +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', +# 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal +# fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all'] + +# alternative examples that would perform all analysis except +# 'timeSeriesOHC' +#generate = ['timeSeriesSST', 'all_regriddedHorizontal', 'all_seaIce'] +#generate = ['all', 'no_timeSeriesOHC'] +# Each subsequent list entry can be used to alter previous list entries. For +# example, the following would produce all analyses except regriddedSST, +# regriddedSSS and regriddedMLD (albeit not in a very intuitive way): +#generate = ['all', 'no_ocean', 'all_timeSeries'] + +[climatology] +## options related to producing climatologies, typically to compare against +## observations and previous runs + +# the first year over which to average climatalogies +startYear = 11 +# the last year over which to average climatalogies +endYear = 20 + +# The names of the mapping file used for interpolation. If a mapping file has +# already been generated, supplying the absolute path can save the time of +# generating a new one. If nothing is supplied, the file name is automatically +# generated based on the MPAS mesh name, the comparison grid resolution, and +# the interpolation method +mpasMappingFile = /global/project/projectdirs/acme/mapping/maps/map_oEC60to30v3_TO_0.5x0.5degree_blin.nc + +[timeSeries] +## options related to producing time series plots, often to compare against +## observations and previous runs + +# start and end years for timeseries analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +startYear = 1 +endYear = 22 + +[index] +## options related to producing nino index. + +# start and end years for the nino 3.4 analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +# For valid statistics, index times should include at least 30 years +startYear = 1 +endYear = 9999 + +[oceanObservations] +## options related to ocean observations with which the results will be compared + +# directory where ocean observations are stored +baseDirectory = /global/project/projectdirs/acme/observations/Ocean/ +sstSubdirectory = SST +sssSubdirectory = SSS +mldSubdirectory = MLD + +[oceanPreprocessedReference] +## options related to preprocessed ocean reference run with which the results +## will be compared (e.g. a POP, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /global/project/projectdirs/acme/ACMEv0_lowres/B1850C5_ne30_v0.4/ocn/postprocessing + +[seaIceObservations] +## options related to sea ice observations with which the results will be +## compared + +# directory where sea ice observations are stored +baseDirectory = /global/project/projectdirs/acme/observations/SeaIce + +[seaIcePreprocessedReference] +## options related to preprocessed sea ice reference run with which the results +## will be compared (e.g. a CICE, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /global/project/projectdirs/acme/ACMEv0_lowres/B1850C5_ne30_v0.4/ice/postprocessing + +[timeSeriesSeaIceAreaVol] +## options related to plotting time series of sea ice area and volume + +# plot on polar plot +polarPlot = False + +[streamfunctionMOC] +## options related to plotting the streamfunction of the meridional overturning +## circulation (MOC) +maxChunkSize = 1000 + +# Mask file for ocean basin regional computation +regionMaskFiles = /global/project/projectdirs/acme/mapping/grids/EC60to30v3_SingleRegionAtlanticWTransportTransects_masks.nc diff --git a/configs/lanl/config.20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.edison b/configs/lanl/config.20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.edison new file mode 100644 index 000000000..422d3d623 --- /dev/null +++ b/configs/lanl/config.20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.edison @@ -0,0 +1,134 @@ +[runs] +## options related to the run to be analyzed and reference runs to be +## compared against + +# mainRunName is a name that identifies the simulation being analyzed. +mainRunName = 20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.edison +# preprocessedReferenceRunName is the name of a reference run that has been +# preprocessed to compare against (or None to turn off comparison). Reference +# runs of this type would have preprocessed results because they were not +# performed with MPAS components (so they cannot be easily ingested by +# MPAS-Analysis) +preprocessedReferenceRunName = B1850C5_ne30_v0.4 + +[input] +## options related to reading in the results to be analyzed + +# directory containing model results +baseDirectory = /lustre/scratch2/turquoise/milena/ACME/20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.edison/run + +# names of ocean and sea ice meshes (e.g. EC60to30, QU240, RRS30to10, etc.) +mpasMeshName = EC60to30 + +[output] +## options related to writing out plots, intermediate cached data sets, logs, +## etc. + +# directory where analysis should be written +baseDirectory = /dir/to/analysis/output + +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', +# 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal +# fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all'] + +# alternative examples that would perform all analysis except +# 'timeSeriesOHC' +#generate = ['timeSeriesSST', 'all_regriddedHorizontal', 'all_seaIce'] +#generate = ['all', 'no_timeSeriesOHC'] +# Each subsequent list entry can be used to alter previous list entries. For +# example, the following would produce all analyses except regriddedSST, +# regriddedSSS and regriddedMLD (albeit not in a very intuitive way): +#generate = ['all', 'no_ocean', 'all_timeSeries'] + +[climatology] +## options related to producing climatologies, typically to compare against +## observations and previous runs + +# the first year over which to average climatalogies +startYear = 2 +# the last year over which to average climatalogies +endYear = 3 + +# The names of the mapping file used for interpolation. If a mapping file has +# already been generated, supplying the absolute path can save the time of +# generating a new one. If nothing is supplied, the file name is automatically +# generated based on the MPAS mesh name, the comparison grid resolution, and +# the interpolation method +mpasMappingFile = /turquoise/usr/projects/climate/SHARED_CLIMATE/mpas_analysis/mapping/map_oEC60to30_TO_0.5x0.5degree_blin.160412.nc + +[timeSeries] +## options related to producing time series plots, often to compare against +## observations and previous runs + +# start and end years for timeseries analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +startYear = 2 +endYear = 3 + +[index] +## options related to producing nino index. + +# start and end years for the nino 3.4 analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +# For valid statistics, index times should include at least 30 years +startYear = 1 +endYear = 9999 + +[oceanObservations] +## options related to ocean observations with which the results will be compared + +# directory where ocean observations are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/observations + +[oceanPreprocessedReference] +## options related to preprocessed ocean reference run with which the results +## will be compared (e.g. a POP, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/ACMEv0_lowres/B1850C5_ne30_v0.4/ocn/postprocessing + +[seaIceObservations] +## options related to sea ice observations with which the results will be +## compared + +# directory where sea ice observations are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/observations/SeaIce + +[seaIcePreprocessedReference] +## options related to preprocessed sea ice reference run with which the results +## will be compared (e.g. a CICE, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/ACMEv0_lowres/B1850C5_ne30_v0.4/ice/postprocessing + +[timeSeriesSeaIceAreaVol] +## options related to plotting time series of sea ice area and volume + +# plot on polar plot +polarPlot = False + +[streamfunctionMOC] +## options related to plotting the streamfunction of the meridional overturning +## circulation (MOC) + +# Mask file for ocean basin regional computation +regionMaskFiles = /turquoise/usr/projects/climate/SHARED_CLIMATE/mpas_analysis/region_masks/EC60to30v1_SingleRegionAtlantic_new_transect_masks.nc diff --git a/configs/lanl/config.20170106.B-EC60to30wLI_with_double_count.wolf b/configs/lanl/config.20170106.B-EC60to30wLI_with_double_count.wolf new file mode 100644 index 000000000..a82068a71 --- /dev/null +++ b/configs/lanl/config.20170106.B-EC60to30wLI_with_double_count.wolf @@ -0,0 +1,128 @@ +[runs] +## options related to the run to be analyzed and reference runs to be +## compared against + +# mainRunName is a name that identifies the simulation being analyzed. +mainRunName = 20170106.B-EC60to30wLI_with_double_count +# preprocessedReferenceRunName is the name of a reference run that has been +# preprocessed to compare against (or None to turn off comparison). Reference +# runs of this type would have preprocessed results because they were not +# performed with MPAS components (so they cannot be easily ingested by +# MPAS-Analysis) +preprocessedReferenceRunName = B1850C5_ne30_v0.4 + +[input] +## options related to reading in the results to be analyzed + +# directory containing model results +baseDirectory = /lustre/scratch2/turquoise/jer/ACME/cases/B_build_double_counting/run + +# names of ocean and sea ice meshes (e.g. EC60to30, QU240, RRS30to10, etc.) +mpasMeshName = EC60to30wLI + +[output] +## options related to writing out plots, intermediate cached data sets, logs, +## etc. + +# directory where analysis should be written +baseDirectory = /dir/to/analysis/output + +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', +# 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal +# fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all'] + +# alternative examples that would perform all analysis except +# 'timeSeriesOHC' +#generate = ['timeSeriesSST', 'all_regriddedHorizontal', 'all_seaIce'] +#generate = ['all', 'no_timeSeriesOHC'] +# Each subsequent list entry can be used to alter previous list entries. For +# example, the following would produce all analyses except regriddedSST, +# regriddedSSS and regriddedMLD (albeit not in a very intuitive way): +#generate = ['all', 'no_ocean', 'all_timeSeries'] + +[climatology] +## options related to producing climatologies, typically to compare against +## observations and previous runs + +# the first year over which to average climatalogies +startYear = 5 +# the last year over which to average climatalogies +endYear = 10 + +# The names of the mapping file used for interpolation. If a mapping file has +# already been generated, supplying the absolute path can save the time of +# generating a new one. If nothing is supplied, the file name is automatically +# generated based on the MPAS mesh name, the comparison grid resolution, and +# the interpolation method +mpasMappingFile = /turquoise/usr/projects/climate/SHARED_CLIMATE/mpas_analysis/mapping/map_oEC60to30wLI_to_0.5x0.5degree_blin.170328.nc + +[timeSeries] +## options related to producing time series plots, often to compare against +## observations and previous runs + +# start and end years for timeseries analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +startYear = 1 +endYear = 9999 + +[index] +## options related to producing nino index. + +# start and end years for the nino 3.4 analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +# For valid statistics, index times should include at least 30 years +startYear = 1 +endYear = 9999 + +[oceanObservations] +## options related to ocean observations with which the results will be compared + +# directory where ocean observations are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/observations + +[oceanPreprocessedReference] +## options related to preprocessed ocean reference run with which the results +## will be compared (e.g. a POP, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/ACMEv0_lowres/B1850C5_ne30_v0.4/ocn/postprocessing + +[seaIceObservations] +## options related to sea ice observations with which the results will be +## compared + +# directory where sea ice observations are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/observations/SeaIce + +[seaIcePreprocessedReference] +## options related to preprocessed sea ice reference run with which the results +## will be compared (e.g. a CICE, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/ACMEv0_lowres/B1850C5_ne30_v0.4/ice/postprocessing + +[streamfunctionMOC] +## options related to plotting the streamfunction of the meridional overturning +## circulation (MOC) + +# Mask file for ocean basin regional computation +regionMaskFiles = /turquoise/usr/projects/climate/SHARED_CLIMATE/mpas_analysis/region_masks/EC60to30wLIv1_SingleRegionAtlantic_new_transect_masks.nc diff --git a/configs/lanl/config.20170120.beta0.GMPAS-QU240.wolf b/configs/lanl/config.20170120.beta0.GMPAS-QU240.wolf new file mode 100644 index 000000000..6274512cb --- /dev/null +++ b/configs/lanl/config.20170120.beta0.GMPAS-QU240.wolf @@ -0,0 +1,134 @@ +[runs] +## options related to the run to be analyzed and reference runs to be +## compared against + +# mainRunName is a name that identifies the simulation being analyzed. +mainRunName = 20170120.beta0.GMPAS-QU240.wolf +# preprocessedReferenceRunName is the name of a reference run that has been +# preprocessed to compare against (or None to turn off comparison). Reference +# runs of this type would have preprocessed results because they were not +# performed with MPAS components (so they cannot be easily ingested by +# MPAS-Analysis) +preprocessedReferenceRunName = B1850C5_ne30_v0.4 + +[input] +## options related to reading in the results to be analyzed + +# directory containing model results +baseDirectory = /lustre/scratch2/turquoise/xylar/ACME/cases/GMPAS-QU240/run + +# names of ocean and sea ice meshes (e.g. EC60to30, QU240, RRS30to10, etc.) +mpasMeshName = QU240 + +[output] +## options related to writing out plots, intermediate cached data sets, logs, +## etc. + +# directory where analysis should be written +baseDirectory = /dir/to/analysis/output + +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', +# 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal +# fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all'] + +# alternative examples that would perform all analysis except +# 'timeSeriesOHC' +#generate = ['timeSeriesSST', 'all_regriddedHorizontal', 'all_seaIce'] +#generate = ['all', 'no_timeSeriesOHC'] +# Each subsequent list entry can be used to alter previous list entries. For +# example, the following would produce all analyses except regriddedSST, +# regriddedSSS and regriddedMLD (albeit not in a very intuitive way): +#generate = ['all', 'no_ocean', 'all_timeSeries'] + +[climatology] +## options related to producing climatologies, typically to compare against +## observations and previous runs + +# the first year over which to average climatalogies +startYear = 1 +# the last year over which to average climatalogies +endYear = 3 + +# The names of the mapping file used for interpolation. If a mapping file has +# already been generated, supplying the absolute path can save the time of +# generating a new one. If nothing is supplied, the file name is automatically +# generated based on the MPAS mesh name, the comparison grid resolution, and +# the interpolation method +mpasMappingFile = /turquoise/usr/projects/climate/SHARED_CLIMATE/mpas_analysis/mapping/map_QU240_to_0.5x0.5degree_blin.170309.nc + +[timeSeries] +## options related to producing time series plots, often to compare against +## observations and previous runs + +# start and end years for timeseries analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +startYear = 1 +endYear = 3 + +[index] +## options related to producing nino index. + +# start and end years for the nino 3.4 analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +# For valid statistics, index times should include at least 30 years +startYear = 1 +endYear = 9999 + +[oceanObservations] +## options related to ocean observations with which the results will be compared + +# directory where ocean observations are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/observations + +[oceanPreprocessedReference] +## options related to preprocessed ocean reference run with which the results +## will be compared (e.g. a POP, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/ACMEv0_lowres/B1850C5_ne30_v0.4/ocn/postprocessing + +[seaIceObservations] +## options related to sea ice observations with which the results will be +## compared + +# directory where sea ice observations are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/observations/SeaIce + +[seaIcePreprocessedReference] +## options related to preprocessed sea ice reference run with which the results +## will be compared (e.g. a CICE, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/ACMEv0_lowres/B1850C5_ne30_v0.4/ice/postprocessing + +[timeSeriesSeaIceAreaVol] +## options related to plotting time series of sea ice area and volume + +# plot on polar plot +polarPlot = False + +[streamfunctionMOC] +## options related to plotting the streamfunction of the meridional overturning +## circulation (MOC) + +# Mask file for ocean basin regional computation +regionMaskFiles = /turquoise/usr/projects/climate/SHARED_CLIMATE/mpas_analysis/region_masks/QU240v1_SingleRegionAtlanticWTransportTransects_masks.nc diff --git a/configs/lanl/config.20170207.MPAS-SeaIce.QU60km_polar.wolf b/configs/lanl/config.20170207.MPAS-SeaIce.QU60km_polar.wolf new file mode 100644 index 000000000..1a70812ff --- /dev/null +++ b/configs/lanl/config.20170207.MPAS-SeaIce.QU60km_polar.wolf @@ -0,0 +1,120 @@ +[runs] +## options related to the run to be analyzed and reference runs to be +## compared against + +# mainRunName is a name that identifies the simulation being analyzed. +mainRunName = MPAS-SeaIce.QU60km_polar +# preprocessedReferenceRunName is the name of a reference run that has been +# preprocessed to compare against (or None to turn off comparison). Reference +# runs of this type would have preprocessed results because they were not +# performed with MPAS components (so they cannot be easily ingested by +# MPAS-Analysis) +preprocessedReferenceRunName = B1850C5_ne30_v0.4 + +[input] +## options related to reading in the results to be analyzed + +# directory containing model results +baseDirectory = /net/scratch2/akt/MPAS/rundirs/rundir_QU60km_polar + +# names of ocean and sea ice meshes (e.g. EC60to30, QU240, RRS30to10, etc.) +mpasMeshName = QU60 + +[output] +## options related to writing out plots, intermediate cached data sets, logs, +## etc. + +# directory where analysis should be written +baseDirectory = /dir/to/analysis/output + +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', +# 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal +# fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all_seaIce'] + +# alternative examples that would perform all analysis except +# 'timeSeriesOHC' +#generate = ['timeSeriesSST', 'all_regriddedHorizontal', 'all_seaIce'] +#generate = ['all', 'no_timeSeriesOHC'] +# Each subsequent list entry can be used to alter previous list entries. For +# example, the following would produce all analyses except regriddedSST, +# regriddedSSS and regriddedMLD (albeit not in a very intuitive way): +#generate = ['all', 'no_ocean', 'all_timeSeries'] + +[climatology] +## options related to producing climatologies, typically to compare against +## observations and previous runs + +# the first year over which to average climatalogies +startYear = 1960 +# the last year over which to average climatalogies +endYear = 1961 + +[timeSeries] +## options related to producing time series plots, often to compare against +## observations and previous runs + +# start and end years for timeseries analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +startYear = 1960 +endYear = 1961 + +[index] +## options related to producing nino index. + +# start and end years for the nino 3.4 analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +# For valid statistics, index times should include at least 30 years +startYear = 1 +endYear = 9999 + +[oceanObservations] +## options related to ocean observations with which the results will be compared + +# directory where ocean observations are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/observations + +[oceanPreprocessedReference] +## options related to preprocessed ocean reference run with which the results +## will be compared (e.g. a POP, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/ACMEv0_lowres/B1850C5_ne30_v0.4/ocn/postprocessing + +[seaIceObservations] +## options related to sea ice observations with which the results will be +## compared + +# directory where sea ice observations are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/observations/SeaIce + +[seaIcePreprocessedReference] +## options related to preprocessed sea ice reference run with which the results +## will be compared (e.g. a CICE, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /usr/projects/climate/SHARED_CLIMATE/ACMEv0_lowres/B1850C5_ne30_v0.4/ice/postprocessing + +[timeSeriesSeaIceAreaVol] +## options related to plotting time series of sea ice area and volume + +# plot on polar plot +polarPlot = False diff --git a/configs/olcf/config.20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.titan b/configs/olcf/config.20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.titan new file mode 100644 index 000000000..9b6557436 --- /dev/null +++ b/configs/olcf/config.20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.titan @@ -0,0 +1,137 @@ +[runs] +## options related to the run to be analyzed and reference runs to be +## compared against + +# mainRunName is a name that identifies the simulation being analyzed. +mainRunName = 20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.edison +# preprocessedReferenceRunName is the name of a reference run that has been +# preprocessed to compare against (or None to turn off comparison). Reference +# runs of this type would have preprocessed results because they were not +# performed with MPAS components (so they cannot be easily ingested by +# MPAS-Analysis) +preprocessedReferenceRunName = B1850C5_ne30_v0.4 + +[input] +## options related to reading in the results to be analyzed + +# directory containing model results +baseDirectory = /lustre/atlas1/cli115/proj-shared/mbranst/20161117.beta0.A_WCYCL1850S.ne30_oEC_ICG.edison/run + +# names of ocean and sea ice meshes (e.g. EC60to30, QU240, RRS30to10, etc.) +mpasMeshName = EC60to30 + +[output] +## options related to writing out plots, intermediate cached data sets, logs, +## etc. + +# directory where analysis should be written +baseDirectory = /dir/to/analysis/output + +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', +# 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal +# fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all'] + +# alternative examples that would perform all analysis except +# 'timeSeriesOHC' +#generate = ['timeSeriesSST', 'all_regriddedHorizontal', 'all_seaIce'] +#generate = ['all', 'no_timeSeriesOHC'] +# Each subsequent list entry can be used to alter previous list entries. For +# example, the following would produce all analyses except regriddedSST, +# regriddedSSS and regriddedMLD (albeit not in a very intuitive way): +#generate = ['all', 'no_ocean', 'all_timeSeries'] + +[climatology] +## options related to producing climatologies, typically to compare against +## observations and previous runs + +# the first year over which to average climatalogies +startYear = 131 +# the last year over which to average climatalogies +endYear = 140 + +# The names of the mapping file used for interpolation. If a mapping file has +# already been generated, supplying the absolute path can save the time of +# generating a new one. If nothing is supplied, the file name is automatically +# generated based on the MPAS mesh name, the comparison grid resolution, and +# the interpolation method +# mpasMappingFile = /path/to/mapping/file + +[timeSeries] +## options related to producing time series plots, often to compare against +## observations and previous runs + +# start and end years for timeseries analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +startYear = 131 +endYear = 140 + +[index] +## options related to producing nino index. + +# start and end years for the nino 3.4 analysis. Using out-of-bounds values +# like start_year = 1 and end_year = 9999 will be clipped to the valid range +# of years, and is a good way of insuring that all values are used. +# For valid statistics, index times should include at least 30 years +startYear = 1 +endYear = 9999 + +[oceanObservations] +## options related to ocean observations with which the results will be compared + +# directory where ocean observations are stored +baseDirectory = /lustre/atlas/proj-shared/cli115/observations +sstSubdirectory = SST +sssSubdirectory = SSS +mldSubdirectory = MLD + +[oceanPreprocessedReference] +## options related to preprocessed ocean reference run with which the results +## will be compared (e.g. a POP, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /lustre/atlas/proj-shared/cli115/milena/ACMEv0_lowres/B1850C5_ne30_v0.4/ocn/postprocessing + +[seaIceObservations] +## options related to sea ice observations with which the results will be +## compared + +# directory where sea ice observations are stored +baseDirectory = /lustre/atlas/proj-shared/cli115/observations/SeaIce + +[seaIcePreprocessedReference] +## options related to preprocessed sea ice reference run with which the results +## will be compared (e.g. a CICE, CESM or ACME v0 run) + +# directory where ocean reference simulation results are stored +baseDirectory = /lustre/atlas/proj-shared/cli115/milena/ACMEv0_lowres/B1850C5_ne30_v0.4/ice/postprocessing + +[timeSeriesSeaIceAreaVol] +## options related to plotting time series of sea ice area and volume + +# plot on polar plot +polarPlot = False + +[streamfunctionMOC] +## options related to plotting the streamfunction of the meridional overturning +## circulation (MOC) + +# Mask file for ocean basin regional computation +regionMaskFiles = /lustre/atlas1/cli115/proj-shared/mapping/grids/EC60to30v1_SingleRegionAtlanticWTransportTransects_masks.nc diff --git a/design_docs/config-file_reorganization.md b/design_docs/config-file_reorganization.md new file mode 100644 index 000000000..943d9b637 --- /dev/null +++ b/design_docs/config-file_reorganization.md @@ -0,0 +1,137 @@ +

Config File Reorganization
+Xylar Asay-Davis
+date: 01-29-2017
+

+

Summary

+This document describes various efforts to clean up the structure of the MPAS-Analysis config file. The idea is to create a template config file that will replace `config.analysis` as well as a number of example config files designed to make use of various MPAS and ACME runs on various machines. The reorganization should make the analysis easier for users to modify and run. + +

Requirements

+ +

Requirement: a simple way of turning on and off individual analysis modules
+Date last modified: 2017/01/29
+Contributors: Xylar Asay-Davis +

+ +There should be a simple, intuitive method for turning on and off individual analysis modules (e.g. `ocean/ohc_timeseries`). This should replace the current approach of having a boolean `generate` flag for each analysis module in a separate config section. Preferably, there should be an equivalent method for turning on and off analysis modules from the command line that overrides that in the config file. + +

Requirement: there should be a simplified template for config files
+Date last modified: 2017/02/01
+Contributors: Xylar Asay-Davis +

+ +The current example config file should be made into a general template. Simplifications should be made to the template so that it can more easily and intuitively be modified for several analyses. Example config files should also be added for analyzing several existing runs on several different machines. + +

Requirement: removal of ACME specific config options
+Date last modified: 2017/02/01
+Contributors: Xylar Asay-Davis +

+ +To the extent possible, ACME-specific config options such as `casename` and `ref_casename_v0` should be generalized in a way that is also appropriate not just ACME runs but also any other runs involving the MPAS components we support. + +

Requirement: consistent section and option names
+Date last modified: 2017/02/01
+Contributors: Xylar Asay-Davis +

+ +A consistent convention of capitalization and underscores should be used throughout the config file. + + +

Design and Implementation

+ +

Implementation: a simple way of turning on and off individual analysis modules
+Date last modified: 2017/02/02
+Contributors: Xylar Asay-Davis +

+ +Implementation of the `config.template` file can be found [here](https://github.com/xylar/MPAS-Analysis/blob/5d5f64bde6ecf1d71f375a61783ff30f1654df01/config.template). + + +The following comment describes the planned implementation in the config file. +``` +# a list of analyses to generate. Valid names are: +# 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', +# 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', +# 'regriddedSeaIceConcThick' +# the following shortcuts exist: +# 'all' -- all analyses will be run +# 'all_timeSeries' -- all time-series analyses will be run +# 'all_regriddedHorizontal' -- all analyses involving regridded horizontal + # fields will be run +# 'all_ocean' -- all ocean analyses will be run +# 'all_seaIce' -- all sea-ice analyses will be run +# 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the +# other analyses). +# 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip the +# given category of analysis +# an equivalent syntax can be used on the command line to override this +# option: +# ./run_analysis.py config.analysis --generate \ +# all,no_ocean,all_timeSeries +generate = ['all'] +``` +Where there are conflicts between items in the `generate` list, successive items will override earlier items. For example, `generate = ['all', 'no_timeSeriesOHC']` will generate all analyses except `timeSeriesOHC`. As another example, `generate = ['all', 'no_ocean', 'all_timeSeries']` would generate all diagnostics except those comparing ocean model results with observations (and previous model results). (Note that a more efficient and intuitive way to do the same would be `generate = ['all_seaIce', 'all_timeSeries']`.) + +An analogous approach has also been added at the command line, for example: +``` +./run_analysis.py config.analysis --generate all,no_ocean,all_timeSeries +``` +If the `--generate` flag is used on the command line, it will replace the generate option in the config file. + +As an aside, I note that it is not clear if future analysis modules will fit neatly into categories like "time series" and "regridded horizontal" fields, and these categories are not meant to be all-encompassing. + +

Implementation: there should be a simplified template for config files
+Date last modified: 2017/01/29
+Contributors: Xylar Asay-Davis +

+ +The required `config.template` has been implemented in #86, specifically [here](https://github.com/xylar/MPAS-Analysis/blob/5d5f64bde6ecf1d71f375a61783ff30f1654df01/config.template). A subdirectory `configs` will be added with several examples from runs on LANL IC and on Edison at NERSC. Other examples can be added as appropriate and useful. + +

Implementation: removal of ACME specific config options
+Date last modified: 2017/02/01
+Contributors: Xylar Asay-Davis +

+ +`casename` has been renamed `mainRunName`, `referenceRunName` has been added for comparison with reference runs that have not been preprocessed (not yet supported), and `ref_casename_v0` has been renamed `preprocessedReferenceRunName`. + +See #86, specifically [config.template](https://github.com/xylar/MPAS-Analysis/blob/5d5f64bde6ecf1d71f375a61783ff30f1654df01/config.template). + +

Implementation: consistent section and option names
+Date last modified: 2017/02/01
+Contributors: Xylar Asay-Davis +

+ +In [config.template](https://github.com/xylar/MPAS-Analysis/blob/5d5f64bde6ecf1d71f375a61783ff30f1654df01/config.template) in #86, "[CamelCase](https://en.wikipedia.org/wiki/Camel_case)" has been used for all sections and options. The first word is lowercase and subsequent words begin with an uppercase latter. Underscores have been removed (except in the syntax used to turn on and off options, where underscores in prefixes `all_` and `no_` make splitting and comparison simpler in the implementation. + + +

Testing

+ +

Testing and Validation: a simple way of turning on and off individual analysis modules
+Date last modified: 2017/02/01
+Contributors: Xylar Asay-Davis +

+ +CI will be added to make sure that the function to parse the generate list (`run_analysis.check_generate`) behaves as expected. + +

Testing and Validation: there should be a simplified template for config files
+Date last modified: 2017/01/29
+Contributors: Xylar Asay-Davis +

+ +There is not a way to test the template in the usual sense. Instead, the test will be asking other developers and users to adapt the template for new runs to make sure it is intuitive. + +

Testing and Validation: removal of ACME specific config options
+Date last modified: 2017/01/29
+Contributors: Xylar Asay-Davis +

+ +For now, the plan is just to rename the appropriate config options, so the test is simply to ensure that analysis runs correctly and produces bit-for-bit identical images to those produced by the current `MPAS-Analysis/develop`. + + +

Testing and Validation: consistent section and option names
+Date last modified: 2017/02/01
+Contributors: Xylar Asay-Davis +

+ +As above, the test is simply to ensure that analysis runs correctly and produces bit-for-bit identical images to those produced by the current `MPAS-Analysis/develop`. + + diff --git a/design_docs/generalize_calendar.md b/design_docs/generalize_calendar.md new file mode 100644 index 000000000..ce3416ea4 --- /dev/null +++ b/design_docs/generalize_calendar.md @@ -0,0 +1,303 @@ +

Title: Generalize Calendar supported by Analysis
+Xylar Asay-Davis
+date: 2017/02/09
+

+

Summary

+Currently, the time variable in `xarray` data sets within MPAS-Analysis has two +major shortcomings, inherited from `xarray` (through `pandas` and `numpy.datetime64`). +First, only the Gregorian calendar is supported. Second, there is not support +for dates outside the years 1678 to 2262. The analysis needs to support both +the Gregorian ('gregorian') and the 365-day ('gregorian_noleap') calendars. It also needs to +support, at a minimum, years between 0001 and 9999, and preferably arbitrary +years both positive and negative. + +A major challenge is that it seems that xarray cannot easily be forced to +use an alternative representation of dates to the troublesome +`numpy.datetime64` type (see, for example, +[pydata/xarray#1084](https://github.com/pydata/xarray/issues/1084)). +The most obvious alternative, `datetime.datetime`, +seemingly cannot be used directly in `xarray` because objects of this type +are converted to `numpy.datetime64` objects at various stages when using +features from pandas, raising errors when dates are out of range. While an +alternative date class (e.g. `netcdftime.DatetimNoLeap`) might be used to +represent dates on the 'gregorian_noleap' calendar, there is no such +preexisting alternative for the 'gregorian' calendar. + +The solution proposed herein is to store time as floating-point days since the +reference date 0001-01-01 and to convert dates in this format to +`datetime.datetime` and `MpasRelativeDelta` objects whenever mathematical +manipulation of dates is required. + +A successful implementation would produce essentially identical analysis to +what is currently produced, but making use of the dates from the MPAS calendar +(whether Gregorian or 365-day) without the need for artifical offsets (e.g. +`yearOffset` used in the current code. Plots of horizontal fields would remain +unchanged while plots of time series would have a time axis with the simulation +date instead of the offset date. + + +

Requirements

+ +

Requirement: The 'Time' coordinate of xarray data sets must be consistent +with the MPAS calendar
+Date last modified: 2017/02/09
+Contributors: Xylar Asay-Davis +

+ +For all data sets used in the analysis, the 'Time' coordinate must represent dates +on the appropriate MPAS calendar, either 'gregorian' or 'gregorian_noleap', depending +on the namelist option 'config_calendar_type'. There must be ways of mathematically +manipulating times (e.g. adding/subtracting offsets and figuring out the amount of time +between two dates) and of making plots that are consistent with these calendars. + +

Requirement: The 'Time' coordinate of xarray data sets must support at least years +0001 and 9999, and preferably any conceivable value
+Date last modified: 2017/02/16
+Contributors: Xylar Asay-Davis +

+ +For all data sets used in the analysis, the 'Time' coordinate must, at a minimum, +support years between 0001 and 9999 (the range of `datetime.datetime`) and preferably +a broader range. + + +

Algorithmic Formulations (optional)

+ +

Design solution: The 'Time' coordinate of xarray data sets must be consistent +with the MPAS calendar
+Date last modified: 2017/02/11
+Contributors: Xylar Asay-Davis, Phillip J. Wolfram +

+ +The proposed solution represents time in `xarray.DataSet` objects as the number of +days since the reference date 0001-01-01. +This is reasonable because the smallest unit of time output in MPAS components is +seconds (and unlikely to ever be shorter than ms). We note that a date specified +as a 64-bit float has a precision high enough to represent seconds for dates up +to +/- 100 million years: +```python +>>> import sys +>>> 1./(sys.float_info.epsilon*365*24*60*60) +142808207.36207813 +``` +We should have no trouble representing any number we might want (including paleo +timescales) with this system. + +For purposes of performing mathematical operations and plotting dates, these +values will be converted to `datetime.datetime` objects (via the proposed +`days_to_datetime` utility function) and back (via the proposed +`datetime_to_days`). + +The conversion operations within `datetime_to_days` and `days_to_datetime` will be +performed with the calendar-aware functions `netCDF4.date2num` and +`netCDF4.num2date`, respectively. Both functions will support lists/arrays of dates +(for efficiency and simplicity of calling code) in addition to single values. + +Curve ploting can be supported with `matplotlib.pyplot.plot_date`, which takes a date +of exactly the format used here (days since 0001-01-01). The compatibility with `plot_date` +was part of the reason for choosing this format for the date. + +

Design solution: The 'Time' coordinate of xarray data sets must support at least years +0001 and 9999, and preferably any conceivable value
+Date last modified: 2017/02/09
+Contributors: Xylar Asay-Davis +

+ +Same as above. In theory, the use of days since 0001-01-01 would allow any year +to be supported, not just the range from 0001 to 9999. However, the conversions +to `datetime.datetime` objects for mathematical manipulation will constrain +the dates to be between `datetime.min` (0001-01-01) and `datetime.max` (9999-12-31). + + +

Design and Implementation

+ +

Implementation: The 'Time' coordinate of xarray data sets must be consistent +with the MPAS calendar
+Date last modified: 2017/02/16
+Contributors: Xylar Asay-Davis +

+ +The proposed implementation is on the branch +[xylar/generalize_calendar](https://github.com/xylar/MPAS-Analysis/tree/generalize_calendar) + +A helper funciton, `mpas_xarray._parse_dataset_time`, computes times as days since +0001-01-01, and serves as a replacement for `mpas_xarray._get_datetimes`. + +**Note: the current implementation breaks the convention that `mpas_xarray` remains +separate from the rest of MPAS-Analyis by using 3 functions from `timekeeping.utility` +in `mpas_xarray`:** +```python +from ..timekeeping.utility import string_to_days_since_date, \ + days_to_datetime, datetime_to_days +``` +**This violates the first requirement in the +[Design Document: Moving variable mapping out of mpas_xarray](https://github.com/xylar/MPAS-Analysis/blob/design_doc_variable_mapping_reorg/design_docs/variable_mapping_reorg.md). +I am open to alternative solutions for keeping `mpas_xarray` separate from the rest +of analysis but these 3 functions do not conceptually belong in `mpas_xarray`. The +problem is exacerbated by the fact that there are analysis-specific functions in +`timekeeping`, meaning that this cannot easily be made a submodule of `mpas_xarray` +(nor would this make very much logical sense). Having 2 `timekeeping` modules, one +for `mpas_xarray` and one for MPAS-Analysis, seems unnecessarily confunsing.** + +The functions `generalized_reader.open_multifile_dataset` and +`mpas_xarray.open_multifile_dataset` have been updated to use this method for parsing +times. This involves removing the `year_offset` argument and adding an optional +`simulation_start_time` argument for supplying a date to use to convert variables +like `daysSinceStartOfSim` to days since 0001-01-01. + +An example of opening a data set and manipulating times withe the new approach in +the OHC script is: +```python +from ..shared.timekeeping.utility import get_simulation_start_time, \ + date_to_days, days_to_datetime, string_to_datetime +... +def ohc_timeseries(config, streamMap=None, variableMap=None): +... + simulationStartTime = get_simulation_start_time(streams) +... + ds = open_multifile_dataset(file_names=file_names, + calendar=calendar, + simulation_start_time=simulation_start_time, + time_variable_name='Time', + variable_list=variable_list, + variable_map=variableMap, + start_date=startDate, + end_date=endDate) + + timeStart = string_to_datetime(startDate) + timeEnd = string_to_datetime(endDate) + + # Select year-1 data and average it (for later computing anomalies) + timeStartFirstYear = string_to_datetime(simulation_start_time) + if timeStartFirstYear < timeStart: + startDateFirstYear = simulation_start_time + firstYear = int(startDateFirstYear[0:4]) + endDateFirstYear = '{:04d}-12-31_23:59:59'.format(firstYear) + filesFirstYear = streams.readpath(streamName, + startDate=startDateFirstYear, + endDate=endDateFirstYear, + calendar=calendar) + dsFirstYear = open_multifile_dataset( + file_names=filesFirstYear, + calendar=calendar, + simulation_start_time=simulation_start_time, + time_variable_name='Time', + variable_list=variable_list, + variable_map=variableMap, + start_date=startDateFirstYear, + end_date=endDateFirstYear) + else: + dsFirstYear = ds + firstYear = timeStart.year + + timeStartFirstYear = date_to_days(year=firstYear, month=1, day=1, + calendar=calendar) + timeEndFirstYear = date_to_days(year=firstYear, month=12, day=31, + hour=23, minute=59, second=59, + calendar=calendar) + + dsFirstYear = dsFirstYear.sel(Time=slice(timeStartFirstYear, + timeEndFirstYear)) + + meanFirstYear = dsFirstYear.mean('Time') +... + yearStart = days_to_datetime(ds.Time.min()).year + yearEnd = days_to_datetime(ds.Time.max()).year + timeStart = date_to_days(year=yearStart, month=1, day=1, + calendar=calendar) + timeEnd = date_to_days(year=yearEnd, month=12, day=31, + calendar=calendar) + + if preprocessedReferenceRunName != 'None': + print ' Load in OHC from preprocessed reference run...' + inFilesPreprocessed = '{}/OHC.{}.year*.nc'.format( + preprocessedInputDirectory, preprocessedReferenceRunName) + dsPreprocessed = open_multifile_dataset( + file_names=inFilesPreprocessed, + calendar=calendar, + simulation_start_time=simulation_start_time, + time_variable_name='xtime') + yearEndPreprocessed = days_to_datetime(dsPreprocessed.Time.max()).year +... +``` + +The `replicate_cycles` function in `sea_ice.timeseries` has been a particular +challenge with the existing calendar. Here is that function with the new 'Time' +coordinate: +```python +def replicate_cycle(ds, dsToReplicate, calendar): + dsStartTime = days_to_datetime(ds.Time.min(), calendar=calendar) + dsEndTime = days_to_datetime(ds.Time.max(), calendar=calendar) + repStartTime = days_to_datetime(dsToReplicate.Time.min(), + calendar=calendar) + repEndTime = days_to_datetime(dsToReplicate.Time.max(), + calendar=calendar) + + repSecondTime = days_to_datetime(dsToReplicate.Time.isel(Time=1), + calendar=calendar) + + period = (MpasRelativeDelta(repEndTime, repStartTime) + + MpasRelativeDelta(repSecondTime, repStartTime)) + + startIndex = 0 + while(dsStartTime > repStartTime + (startIndex+1)*period): + startIndex += 1 + + endIndex = 0 + while(dsEndTime > repEndTime + (endIndex+1)*period): + endIndex += 1 + + dsShift = dsToReplicate.copy() + + times = days_to_datetime(dsShift.Time, calendar=calendar) + dsShift.coords['Time'] = ('Time', + datetime_to_days(times + startIndex*period, + calendar=calendar)) + # replicate cycle: + for cycleIndex in range(startIndex, endIndex): + dsNew = dsToReplicate.copy() + dsNew.coords['Time'] = ('Time', + datetime_to_days(times + (cycleIndex+1)*period, + calendar=calendar)) + dsShift = xr.concat([dsShift, dsNew], dim='Time') + + return dsShift +``` + +

Implementation: The 'Time' coordinate of xarray data sets must support at least years +0001 and 9999, and preferably any conceivable value
+Date last modified: 2017/02/09
+Contributors: Xylar Asay-Davis +

+ +Same as above. + +

Testing

+ +

Testing and Validation: The 'Time' coordinate of xarray data sets must be consistent +with the MPAS calendar
+Date last modified: 2017/02/11
+Contributors: Xylar Asay-Davis +

+In [xylar/generalize_calendar](https://github.com/xylar/MPAS-Analysis/tree/generalize_calendar), +unit testing has been added for `timekeeping` and `mpas_xarray` that checks both the `gregorian` +and `gregorian_noleap` calendars under simple test conditions. However, we have no data sets +that test `gregorian`, so we have a somewhat limited ability to test this calendar option. +Fortunately, there are also no immediate plans to run with `gregorian`. + +I will make sure all tests with config files in the `configs/lanl` and `configs/edison` +directories produce bit-for-bit results with the current `develop`. + +

Testing and Validation: The 'Time' coordinate of xarray data sets must support at least years +0001 and 9999, and preferably any conceivable value
+Date last modified: 2017/02/11
+Contributors: Xylar Asay-Davis +

+ +Unit tests have been added to ensure that dates both close to 0001-01-01 and typical +calendar dates (e.g. 2017-01-01) function as expected. + +@akturner's MPAS-SeaIce run with real dates (mentioned in +[#81](https://github.com/MPAS-Dev/MPAS-Analysis/issues/81)) has been successfully +run with the proposed approach. This run started in 1958, and had presented a problem +for MPAS-Analysis with the previous calendar. diff --git a/design_docs/generalized_horizontal_interpolation.md b/design_docs/generalized_horizontal_interpolation.md new file mode 100644 index 000000000..719b40eed --- /dev/null +++ b/design_docs/generalized_horizontal_interpolation.md @@ -0,0 +1,147 @@ +

Title: Generalized Horizontal Interpolation in MPAS-Analysis
+Xylar Asay-Davis
+date: 01-19-2017
+

+

Summary

+Currently, MPAS-Analysis uses various methods to perform horizontal interpolation. For constructing ocean climatologies, nearest-neighbor interpolation is used, while for sea-ice climatologies, `ncremap` is used with the requirement that a mapping file for the appropriate source and destination grids is provided through the config file. This project intends to move MPAS-Analysis to a unified approach to horizontal interpolation that does not require pre-generated mapping files (though it should support caching mapping files for faster execution). + +Many types of analysis in MPAS will require fields that are interpolated from MPAS grids to arbitrary points, not just to points on a lat/lon grid. This project will not attempt to address that case completely but will take that need into consideration in designing a solution that can be extended to interpolation at arbitrary points in the future. + +

Requirements

+

Requirement: Higher-order interpolation
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +The option to interpolate smoothly (e.g. linearly or with barycentric coordinates) between cell-centered values should be added. The calling code should easily be able to select among various orders of interpolation with a flag. + +

Consideration: Interpolation should handle periodic boundaries
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +If and when MPAS-Analysis supports planar test cases with periodic boundaries, interpolation should be extended to handle periodic boundaries + +

Consideration: Interpolation should handle Cartesian meshes
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +If and when MPAS-Analysis supports planar test cases with purely Cartesian meshes (e.g. where `latCell` and `lonCell` do not vary), interpolation should be extended to handle Cartesian Coordinates + +

Consideration: Support for arbitrary output interpolation points
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis +

+ +The calling code should be able to supply any desired interpolation points, not just a regular latitude-longitude grid. + +

Consideration: Support caching results from any costly, one-time geometric computations
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +For many potential algorithms used to perform interpolation, there is likely to be a relatively costly step of computing fields such as indices into input data fields and interpolation weights that 1) only need to be computed once for a given input mesh and set of output points and 2) are independent of the data in the field being interpolated. If this data were cached, it could mean that rerunning the analysis (which might be very desirable, e.g., while monitoring the progress of a run) would be much cheaper than the initial run. Also, a cached weight file from a previous analysis run could be used when analyzing a subsequent run with identical source meshes. + + + +

Algorithmic Formulations

+ +

Design solution: Higher-order interpolation
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +The approach will be to create SCRIP files (or, in the future for greater flexibility perhaps ESMF grid/mesh files) for the source and destination grids, then to use `ESMF_RegridWeightGen` to generate a mapping file. `ESMF_RegridWeightGen` supports 5 interpolation methods---bilinear, patch, nearestdtos, neareststod, and conserve---and we would likely support at least bilinear, neareststod and conserve, and perhaps all 5. The destination grid will be specified either by reading values from `lat` and `lon` coordinates of a NetCDF file or through config file options `lat` and `lon` that are typically expressions involving `numpy.arange` or `numpy.linspace`. + +Then, `ncremap` will be used to remap the desired list of variables from an MPAS NetCDF file to the desired destination grid. + +

Design solution: Interpolation should handle periodic boundaries
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +For now, periodic boundaries (except for the obvious one at +/- 180 longitude) will not be supported. It appears that ESMF grid files do include support for periodic boundaries so the current solution should be relatively easy to extend to periodic boundaries in the future. + +

Design solution: Interpolation should handle Cartesian meshes
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +ESMF unstructured mesh files seem to support Cartesian coordinates. This will be investigated if and when MPAS-Analysis can accommodate a test case with Cartesian coordinates. + +

Design solution: Support for arbitrary output interpolation points
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis +

+ +I do not intend to address this consideration in this project. It may be that `ESMF_RegridWeightGen` can also be used to perform interpolation to arbitrary points (in particular, a set of points that are not cell centers or vertices of a mesh), but this is not yet clear to me. If not, an alternative solution for arbitrary destination points will be needed. + +

Design solution: Support caching results from any costly, one-time geometric computations
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +This should be relatively easy to accommodate with `ESMF_RegridWeightGen` and `ncremap`. The default behavior of the function for generating interpolation weights will be to do nothing if the mapping file already exists. Further, we can support an optional config option that will point to an existing mapping file if one has already been generated and cached somewhere (e.g. in a shared directory). Eventually, we will probably want to systematically store these mapping files for typical MPAS meshes and typical output grids, particularly for those that are expensive to generate. + +

Design and Implementation

+

Implementation: Higher-order interpolation
+Date last modified: 2017/03/04
+Contributors: Xylar Asay-Davis

+ +Implementation is in the branch https://github.com/xylar/MPAS-Analysis/tree/horiz_interp. + +`ESMF_RegridWeightGen` is used to compute regridding weights that are 'bilinear', 'neareststod' (nearest neighbor) or 'conserve' (conservative). The order of regridding can be chosen separately for MPAS model results, ocean observations and sea-ice observationos via `mpasInterpolationMethod` and `interpolationMethod` flags (see the template: https://github.com/xylar/MPAS-Analysis/blob/horiz_interp/config.template). + +

Implementation: Interpolation should handle periodic boundaries
+Date last modified: 2017/03/04
+Contributors: Xylar Asay-Davis

+ +Not yet supported. + +

Implementation: Interpolation should handle Cartesian meshes
+Date last modified: 2017/03/04
+Contributors: Xylar Asay-Davis

+ +Not yet supported. + +

Implementation: Support for arbitrary output interpolation points
+Date last modified: 2017/03/04
+Contributors: Xylar Asay-Davis +

+ +Not yet supported. + +

Implementation: Support caching results from any costly, one-time geometric computations
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +Mapping files, climatologies and remapped climatologies are cached when they are created. Both mapping files and the directory containing the remapped climatologies from observations can be supplied via the config file, saving the time of computing them. + + + +

Testing

+

Testing and Validation: Higher-order interpolation
+Date last modified: 2017/03/04
+Contributors: Xylar Asay-Davis

+ +Testing of each of the flags ('bilinear', 'neareststod' and 'conserve') has been performed with the `GMPAS-QU240` run, all of wich produce plots that look acceptable. Bilinear and conserve methods leave halos of invalid cells around land at coarse resolution, which is consistent with the coarse resolution of this test mesh. + +An alpha8 and a beta0 run on edison. They ran successfully but I have not had a chance to examine the output. + +

Testing and Validation: Interpolation should handle periodic boundaries
+Date last modified: 2017/03/04
+Contributors: Xylar Asay-Davis

+ +Not yet supported. + +

Testing and Validation: Interpolation should handle Cartesian meshes
+Date last modified: 2017/03/04
+Contributors: Xylar Asay-Davis

+ +Not yet supported. + +

Testing and Validation: Support for arbitrary output interpolation points
+Date last modified: 2017/03/04
+Contributors: Xylar Asay-Davis +

+ +Not yet supported. + +

Testing and Validation: Support caching results from any costly, one-time geometric computations
+Date last modified: 2017/02/25
+Contributors: Xylar Asay-Davis

+ +I have verified that I can rerun without re-computing mapping files or climatologies. Using the `GMPAS-QU240` run, I have verified that I can supply mapping files and remapped observation climatologies without them being re-computed diff --git a/design_docs/template.md b/design_docs/template.md new file mode 100644 index 000000000..ab330559e --- /dev/null +++ b/design_docs/template.md @@ -0,0 +1,64 @@ +

Title: Some descriptive title
+MPAS-Analysis Team
+date: YYYY/MM/DD
+

+

Summary

+The purpose of this section is to summarize what capability is to be added to +the MPAS-Analysis system through this design process. It should be clear +what new code will do that the current code does not. Summarizing the primary +challenges with respect to software design and implementation is also +appropriate for this section. Finally, this statement should contain a general +statement with regard to what is "success." + + +

Requirements

+ +

Requirement: name-of-requirement-here
+Date last modified: YYYY/MM/DD
+Contributors: (add your name to this list if it does not appear) +

+Each requirement is to be listed under a "section" heading, as there will be a +one-to-one correspondence between requirements, design, proposed implementation +and testing. Requirements should not discuss technical software issues, but +rather focus on model capability. To the extent possible, requirements should +be relatively independent of each other, thus allowing a clean design solution, +implementation and testing plan. + + +

Algorithmic Formulations (optional)

+

Design solution: short-description-of-proposed-solution-here
+Date last modified: YYYY/MM/DD
+Contributors: (add your name to this list if it does not appear) +

+For each requirement, there is a design solution that is intended to meet that +requirement. Design solutions can include detailed technical discussions of +PDEs, algorithms, solvers and similar, as well as technical discussion of +performance issues. In general, this section should steer away from a detailed +discussion of low-level software issues such as variable declarations, +interfaces and sequencing. + + +

Design and Implementation

+

Implementation: short-desciption-of-implementation-here
+Date last modified: YYYY/MM/DD
+Contributors: (add your name to this list if it does not appear) +

+This section should detail the plan for implementing the design solution for +requirement XXX. In general, this section is software-centric with a focus on +software implementation. Pseudo code is appropriate in this section. Links to +actual source code are appropriate. Project management items, such as git +branches, timelines and staffing are also appropriate. Pseudo code can be +included via blocks like +```python +def example_function(foo): + return foo**2.0 +``` + +

Testing

+

Testing and Validation: short-desciption-of-testing-here
+Date last modified: YYYY/MM/DD
+Contributors: (add your name to this list if it does not appear) +

+How will XXX be tested, i.e., how will be we know when we have met requirement +XXX? What testing will be included for use with `py.test` for continuous integration? +Description of how testing that requires off-line or specialized setup will be used. diff --git a/design_docs/timekeeping_reorg.md b/design_docs/timekeeping_reorg.md new file mode 100644 index 000000000..bb8c7ce5b --- /dev/null +++ b/design_docs/timekeeping_reorg.md @@ -0,0 +1,209 @@ +

Reorganize Timekeeping
+Xylar Asay-Davis
+date: 2017/02/06
+

+

Summary

+Currently, the `Date` class is used to parse a date object from a date string +(e.g. '0001-01-01_00:00:00') taken from MPAS namelists, streams files or time +variables (e.g. `xtime`). However, this class assumes a 365-day calendar and +cannot easily be adapted to the Gregorian calendar also supported by MPAS +components (`config_calendar_type = 'gregorian'`). Furthermore, existing +routines exist to handle most of the capabilites +of the `Date` class. The proposed reorganization would eliminate the `Date` class +in favor of a numer of helper functions that can be used to convert between various +date formats: date strings, days since a reference date, `datetime.datetime` objects +and `relativedelta` objects (see below). The success of this reorganization will be +demonstrated when the existing analysis can be performed successfully with the new +utility functions with both MPAS calendars, the `'gregorian_noleap'` (365-day) calendar +used by most existing ACME and MPAS runs and the `'gregorian'` calendar also supported +in MPAS components. + + +

Requirements

+ +

Requirement: Date string parsing supports both MPAS calendars
+Date last modified: 2017/02/06
+Contributors: Xylar Asay-Davis +

+ +There must be a way to parse dates from MPAS that is aware of the appropriate calendar +stored in the `config_calendar_type` namelist option, either `'gregorian'` or +`'gregorian_noleap'`. + +

Requirement: Capability of incrementing dates by a number of years and/or months
+Date last modified: 2017/02/06
+Contributors: Xylar Asay-Davis +

+ +The analysis requires a way of incrementing a given date by an interval specified in +not only days, hours, minutes and seconds but also months and years. The standard +`datetime.timedelta` does not support increments by years and months because they are +not fixed periods of time. The existing `Date` class in MPAS-Analysis supports +increments in months and years, but only for the `'gregorian_noleap'` (365-day) calendar. +A method must exist to increment dates on either calendar by a given number of years +and/or months (in addition to days, hours, etc.). + + +

Design and Implementation

+ +

Implementation: Date string parsing supports both MPAS calendars
+Date last modified: 2017/02/06
+Contributors: Xylar Asay-Davis +

+ +The implementation is on the branch: +https://github.com/xylar/MPAS-Analysis/tree/timekeeping_reorg +and in PR #102 + +The function for converting a date string to a `datetime.datetime` is documented as follows: +```python +def stringToDatetime(dateString): + """ + Given a date string and a calendar, returns a `datetime.datetime` + + Parameters + ---------- + dateString : string + A date and time in one of the following formats: + - YYYY-MM-DD hh:mm:ss + - YYYY-MM-DD hh.mm.ss + - YYYY-MM-DD SSSSS + - DDD hh:mm:ss + - DDD hh.mm.ss + - DDD SSSSS + - hh.mm.ss + - hh:mm:ss + - YYYY-MM-DD + - YYYY-MM + - SSSSS + + Note: either underscores or spaces can be used to separate the date + from the time portion of the string. + + Returns + ------- + datetime : A `datetime.datetime` object + + Raises + ------ + ValueError + If an invalid `dateString` is supplied. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/04/2017 + """ +``` + +As long as `relativedelta` objects rather than `datetime.timedelta` objects are used to increment +`datetime.datetime` objects, `datetime.datetime` can be used to represent dates on either the Gregorian +or the 365-day calendar. + +

Implementation: Capability of incrementing dates by a number of years and/or months
+Date last modified: 2017/02/09
+Contributors: Xylar Asay-Davis +

+ +The implementation is on the branch: +https://github.com/xylar/MPAS-Analysis/tree/timekeeping_reorg +and in PR #102 + +The proposed implementation adds a new class MpasRelativeDelta derived from +`dateutil.relativedelta.relativedelta` to compute the expected +increments in years and months (as well as days, hours, minutes and seconds, as needed). +The class is documented as follows +```python +class MpasRelativeDelta(relativedelta): + """ + MpasRelativeDelta is a subclass of dateutil.relativedelta for relative time + intervals with different MPAS calendars. + + Only relative intervals (years, months, etc.) are supported and not the + absolute date specifications (year, month, etc.). Addition/subtraction + of datetime.datetime objects (but not other MpasRelativeDelta, + datetime.timedelta or other related objects) is supported. + + Author + ------ + Xylar Asay-Davis + + Last Modified + ------------- + 02/09/2017 +``` + +The function for converting a date string to a `MpasRelativeDelta` is documented as follows: +```python +from dateutil.relativedelta import relativedelta +... +def stringToRelativedelta(dateString, calendar='gregorian'): + """ + Given a date string and a calendar, returns an instance of + `MpasRelativeDelta` + + Parameters + ---------- + dateString : string + A date and time in one of the following formats: + - YYYY-MM-DD hh:mm:ss + - YYYY-MM-DD hh.mm.ss + - YYYY-MM-DD SSSSS + - DDD hh:mm:ss + - DDD hh.mm.ss + - DDD SSSSS + - hh.mm.ss + - hh:mm:ss + - YYYY-MM-DD + - YYYY-MM + - SSSSS + + Note: either underscores or spaces can be used to separate the date + from the time portion of the string. + + calendar: {'gregorian', 'gregorian_noleap'}, optional + The name of one of the calendars supported by MPAS cores + + Returns + ------- + relativedelta : An `MpasRelativeDelta` object + + Raises + ------ + ValueError + If an invalid `dateString` is supplied. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/04/2017 + """ +``` + +

Testing

+ +

Testing and Validation: Date string parsing supports both MPAS calendars
+Date last modified: 2017/02/08
+Contributors: Xylar Asay-Davis +

+Analysis will be run on Edison with all available configurations found in `configs/edison`. As there +are currently no plans to run with the `gregorian` calendar option, we do not have test runs that use this +calendar. If this situation changes in the future, we'll test at that time. + +Regression tests previously for `Date` has been modified to test the new utility functions. New tests +have been added to test that dates with both `gregorian` and `gregorian_noleap` calendars behave as +expected, particularly around the leap day. + +

Testing

+

Testing and Validation: Capability of incrementing dates by a number of years and/or months
+Date last modified: 2017/02/06
+Contributors: Xylar Asay-Davis +

+ +Same as above. diff --git a/design_docs/variable_mapping_reorg.md b/design_docs/variable_mapping_reorg.md new file mode 100644 index 000000000..72136e696 --- /dev/null +++ b/design_docs/variable_mapping_reorg.md @@ -0,0 +1,228 @@ +

Title: Moving variable mapping outside of mpas_xarray
+Xylar Asay-Davis
+date: 2017/02/10
+

+

Summary

+In discussions with @pwolfram, it became clear that we would like to keep +mpas_xarray as general as possible, rather than adding code specific to +MPAS-Analysis. In particular, the capability for mapping variable names +that is currently part of mpas_xarray is likely a capability that only +MPAS-Analysis will need when opening xarray data sets. Likewise, there is +a desire for mpax_xarray not to use any of the functionality outside of its +own module so that it remains autonomous from MPAS-Analysis. + +At the same time, it is desirable for efficiency and parallelism to perform +certain operations during the preprocessing step within xarray, rather than +constructing a data set first and then (in serial) performing manipulations +(e.g. creating a time coordinate and slicing variables). + +The solution will be tested by making sure it produces bit-for-bit identical +results to those from the develop branch for typical test cases on LANL IC +and Edison. + +

Requirements

+ +

Requirement: mpas_xarray does not include MPAS-Analysis specific +functionality
+Date last modified: 2017/02/10
+Contributors: Xylar Asay-Davis +

+ +MPAS-Analysis specific functionality such as variable mapping should be +removed from mpas_xarray so it can remain an independent module, requiring +minimal modification to accommodate MPAS-Analysis' needs. + +

Requirement: MPAS-Analysis specific functionality should be supported in +xarray preprossing
+Date last modified: 2017/02/10
+Contributors: Xylar Asay-Davis +

+ +There should be a way to perform MPAS-Analysis specific functionality such as +mapping variables during preprocessing. This functionality should be +relatively easy to add to as new preprocessing needs arise. + + +

Algorithmic Formulations (optional)

+ +

Algorithm: mpas_xarray does not include MPAS-Analysis specific +functionality
+Date last modified: 2017/02/10
+Contributors: Xylar Asay-Davis +

+ +All functions and function arguments related to variable mapping will +be removed from mpas_xarray and moved elsewhere. + +

Algorithm: MPAS-Analysis specific functionality should be supported in +xarray preprossing
+Date last modified: 2017/02/15
+Contributors: Xylar Asay-Davis +

+ +A new utility function, `open_multifile_dataset` will added to `mpas_xarray` +that simplifies current calls to `xarray.open_mfdataset` to hide the +preprocessor and take care of removing redundant time indices once the dataset +has been built. (This function doesn't directly address the requirement but +is meant to make `mpas_xarray` easier to use and made sense because it +has a one-to-one correspondence with other functionality, described below, +that does address the requirement.) + +A new module, `generalized_reader` will also be added with its own +`open_multifile_dataset` function. This version takes additional arguments +including a variable map and start and end dates for the dataset. +`generalized_reader.open_multifile_dataset` will create a data set +by calling `xarray.open_mfdataset` with its own preprocessing function, +`generalized_reader._preprocess` that first maps variable names, then +calls `mpas_xarray.preprocess` to finish the job. Once the dataset has +been constructed, redundant time indices are removed and the 'Time' +coordinate is sliced to be between the supplied start and end dates. + +This solution may add some confusion in terms of which reader should +be used to open xarray datasets. It is my sense that most developers +adding new functionality will do so by modifying existing scripts, and +these examples should make it clear which version of +`open_multifile_dataset` is most appropriate. Nevertheless, clear +documentation of `generalized_reader` and `mpas_xarray`, and their +differences are needed. + +Here is a typical usage of `generalized_reader.open_multifile_dataset`: +```python +from mpas_analysis.shared.generalized_reader.generalized_reader \ + import open_multifile_dataset + +file_name = 'example_jan_feb.nc' +timestr = ['xtime_start', 'xtime_end'] +var_list = ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] +variable_map = { + 'avgSurfaceTemperature': + ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature', + 'other_string', + 'yet_another_string'], + 'daysSinceStartOfSim': + ['time_avg_daysSinceStartOfSim', + 'xtime', + 'something_else']} +ds = open_multifile_dataset(file_names=file_name, + calendar=calendar, + time_variable_name=timestr, + variable_list=var_list, + start_date='0001-01-01', + end_date='9999-12-31', + variable_map=variable_map, + year_offset=1850) +``` + +Here is the same for `mpas_xarray.open_multifile_dataset` without the +variable map, start and end dates: +```python +from mpas_analysis.shared.mpas_xarray.mpas_xarray \ + import open_multifile_dataset + +file_name = 'example_jan_feb.nc' +timestr = ['xtime_start', 'xtime_end'] +var_list = ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] + +ds = open_multifile_dataset(file_names=file_name, + calendar=calendar, + time_variable_name=timestr, + variable_list=var_list, + year_offset=1850) +``` + + +

Design and Implementation

+ +

Implementation: mpas_xarray does not include MPAS-Analysis specific +functionality
+Date last modified: 2017/02/15
+Contributors: Xylar Asay-Davis +

+ +A test branch can be found here +[xylar/MPAS-Analysis/variable_mapping_reorg](https://github.com/xylar/MPAS-Analysis/tree/variable_mapping_reorg) + +I have removed `map_variable` and `rename_variables` from `mpas_xarray`. +I also removed any mention of the variable map from the rest of `mpas_xarray`. + +This branch also includes several other cleanup operations that are not +addressing any requirements. These include: + - I added a new helper function, `open_multifile_dataset`, for opening an + xarray data set in a single, simple command without reference to the + preprocessor. This function should make opening new data sets more + intuitive for mpas_xarray users. + - making several utility functions non-public (it is unclear to me why anyone + want to call these directly): + - `_assert_valid_datetimes` + - `_assert_valid_selections` + - `_ensure_list` + - `_get_datetimes` + - I have removed the ability to run `mpas_xarray.py` as a script and the associated + tests. This is on the premise that 1) the test were outdated and would have + needed to be updated to work with the current code and 2) unit testing in + `test/test_mpas_xarray.py` takes care of this capability in a better way. + - I have tried to make variable names a bit more verbose in various places. + However, at @pwolfram'2 request, I have left ds for datasets, following the + `xarray` convention. + - I have tried to improve the docstrings using a syntax that should be useful + for generating documentation later on. + - I have update unit testing to work with the new inerface, notably the + `open_multifile_dataset` function. + +

Implementation: MPAS-Analysis specific functionality should be supported in +xarray preprossing
+Date last modified: 2017/02/15
+Contributors: Xylar Asay-Davis +

+ +In the same branch as above, I have added a `generalized_reader` module that +extends the capabilities of `mpas_xarray` to include mapping of variable names. +The file structure is as follows: +``` +mpas_analysis/shared/ + - generalized_reader/ + __init__.py + generalized_reader.py +``` +`generalized_reader.py` contains a function `open_multifile_dataset` that is similar to +the one in `mpas_xarray` but with additional arguments needed by analysis: + - `variable_map`, a map between MPAS and MPAS-Analysis variable names + - `start_date`, the start date of the analysis + - `end_date`, the end date of the analysis +This function performs the same steps as `mpas_xarray.open_multifile_dataset` +but uses the local preprocessing function, `_preprocess`, and also slices +the 'Time' coordinate using the given start and end dates as a final step. + +The `generalized_reader._preprocess` funciton first maps variable names, then calls +`mpas_xarray.preprocess` to do the rest of the preprocessing as normal. + +Two private functions, `_map_variable_name` and `_rename_variables` (take out of +`mpas_xarray`) are used to perform variable-name mapping. + +

Testing

+ +

Testing and Validation: MPAS-Analysis specific functionality should be supported in +xarray preprossing
+Date last modified: 2017/02/15
+Contributors: Xylar Asay-Davis +

+ +In [xylar/MPAS-Analysis/variable_mapping_reorg](https://github.com/xylar/MPAS-Analysis/tree/variable_mapping_reorg), +the unit testing for mpas_xarray has been updated. This includes moving unit testing for +variable mapping elsewhere. + +I will make sure all tests with config files in the `configs/lanl` and `configs/edison` +directories produce bit-for-bit results with the current `develop`. + +

Testing and Validation: MPAS-Analysis specific functionality should be supported in +xarray preprossing
+Date last modified: 2017/02/10
+Contributors: Xylar Asay-Davis +

+ +Largely, the same as above. + +I have added unit testing for `generalized_reader` (via the standalone +`generalized_reader.open_multifile_dataset` function). These tests ensure that: + - variable mapping works as expected + - start and end dates work as expected diff --git a/mpas_analysis/configuration/MpasAnalysisConfigParser.py b/mpas_analysis/configuration/MpasAnalysisConfigParser.py index fe1cd7ddf..e2c1b6dfb 100644 --- a/mpas_analysis/configuration/MpasAnalysisConfigParser.py +++ b/mpas_analysis/configuration/MpasAnalysisConfigParser.py @@ -1,19 +1,23 @@ -import numbers -import ast - """ A configuratin parser class for MPAS analysis. MpasAnalysisConfigParser adds the capabilities to get an option including a default value (`getWithDefault(section, option, default, ...)`) and to get options that are lists, tuples, dicts, etc (`getExpression(section, option)`). -Author: Xylar Asay-Davis -Last Modified: 12/07/2016 +Author: Xylar Asay-Davis, Phillip J. Wolfram +Last Modified: 02/27/2017 """ +import numbers +import ast +import numpy as np from ConfigParser import ConfigParser +npallow = dict(linspace=np.linspace, xrange=xrange, range=range, + arange=np.arange, pi=np.pi, Pi=np.pi, __builtins__=None) + + class MpasAnalysisConfigParser(ConfigParser): def getWithDefault(self, section, option, default): @@ -25,16 +29,16 @@ def getWithDefault(self, section, option, default): is present in the config file. Author: Xylar Asay-Davis - Last Modified: 12/03/2016 + Last Modified: 02/27/2017 """ if self.has_section(section): if self.has_option(section, option): - if isinstance(default, numbers.Integral): + if isinstance(default, bool): + return self.getboolean(section, option) + elif isinstance(default, numbers.Integral): return self.getint(section, option) elif isinstance(default, numbers.Real): return self.getfloat(section, option) - elif isinstance(default, bool): - return self.getboolean(section, option) elif isinstance(default, (list, tuple, dict)): return self.getExpression(section, option) else: @@ -44,29 +48,47 @@ def getWithDefault(self, section, option, default): self.set(section, option, str(default)) return default - def getExpression(self, section, option, elementType=None): + def getExpression(self, section, option, elementType=None, + usenumpyfunc=False): """ Get an option as an expression (typically a list, though tuples and dicts should also work). `section` and `option` work as in `get(...)`. The expression is required to have valid python syntax, so that string entries are required to be in single or double quotes. + If the option in the section does not exist, returns None. If `elementType` is supplied, each element in a list or tuple, or each value in a dictionary are cast to this type. This is likely most useful for ensuring that all elements in a list of numbers are of type float, rather than int, when the distinction is important. - Author: Xylar Asay-Davis - Last Modified: 12/0y/2016 + If `usenumpyfunc` is True, expression is evaluated within the context + of having selected numpy and / or np functionality available. + + Author: Xylar Asay-Davis, Phillip J. Wolfram + Last Modified: 04/10/2017 """ - expressionString = self.get(section, option) - result = ast.literal_eval(expressionString) + if self.has_section(section): + if self.has_option(section, option): + expressionString = self.get(section, option) + if usenumpyfunc: + assert '__' not in expressionString, \ + "'__' is not allowed in {} "\ + "for `usenumpyfunc=True`".format(expressionString) + sanitizedstr = expressionString.replace('np.', '')\ + .replace('numpy.', '')\ + .replace('__', '') + result = eval(sanitizedstr, npallow) + else: + result = ast.literal_eval(expressionString) - if elementType is not None: - if isinstance(result, (list, tuple)): - result = [elementType(element) for element in result] - elif isinstance(result, dict): - for key in result: - result[key] = elementType(result[key]) + if elementType is not None: + if isinstance(result, (list, tuple)): + result = [elementType(element) for element in result] + elif isinstance(result, dict): + for key in result: + result[key] = elementType(result[key]) - return result + return result + else: + return None diff --git a/mpas_analysis/ocean/meridional_overturning_circulation.py b/mpas_analysis/ocean/meridional_overturning_circulation.py new file mode 100644 index 000000000..30177b14a --- /dev/null +++ b/mpas_analysis/ocean/meridional_overturning_circulation.py @@ -0,0 +1,586 @@ +""" +Computation and plotting of model meridional overturning circulation. +Will eventually support: + * MOC streamfunction, post-processed (currently supported) + * MOC streamfunction, from MOC analysis member + * MOC time series (max value at 24.5N), post-processed + * MOC time series (max value at 24.5N), from MOC analysis member + +Authors +------- +Milena Veneziani, Mark Petersen, Phillip Wolfram, Xylar Asay-Davis + +Last Modified +------------- +04/08/2017 +""" + +import xarray as xr +import numpy as np +import netCDF4 +import os +from functools import partial + +from ..shared.constants.constants import m3ps_to_Sv, rad_to_deg, \ + monthDictionary +from ..shared.plot.plotting import plot_vertical_section,\ + timeseries_analysis_plot, setup_colormap + +from ..shared.io.utility import build_config_full_path, make_directories + +from ..shared.generalized_reader.generalized_reader \ + import open_multifile_dataset + +from ..shared.timekeeping.utility import get_simulation_start_time, \ + days_to_datetime + +from ..shared.analysis_task import setup_task, check_analysis_enabled +from ..shared.climatology.climatology import update_start_end_year, \ + cache_climatologies +from ..shared.time_series import time_series + + +def moc_streamfunction(config): # {{{ + """ + Process MOC analysis member data if available, or compute MOC at + post-processing if not. Plots streamfunction climatolgoical sections + as well as time series of max Atlantic MOC at 26.5N (latitude of + RAPID MOC Array). + + Parameters + ---------- + config : instance of MpasAnalysisConfigParser + configuration options used to customize the analysis task + + streamMap : dict, optional + a dictionary of MPAS-O stream names that map to + their mpas_analysis counterparts. + + variableMap : dict, optional + a dictionary of MPAS-O variable names that map + to their mpas_analysis counterparts. + + Authors + ------- + Milena Veneziani, Mark Petersen, Phillip J. Wolfram, Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + """ + + # **** Initial settings **** + + # perform common setup for the task + namelist, runStreams, historyStreams, calendar, namelistMap, streamMap, \ + variableMap, plotsDirectory = setup_task(config, componentName='ocean') + + check_analysis_enabled( + namelist=namelist, + analysisOptionName='config_am_timeseriesstatsmonthly_enable', + namelistMap=namelistMap, + raiseException=True) + + # Get a list of timeSeriesStats output files from the streams file, + # reading only those that are between the start and end dates + # First a list necessary for the streamfunctionMOC climatology + streamName = historyStreams.find_stream(streamMap['timeSeriesStats']) + startDateClimo = config.get('climatology', 'startDate') + endDateClimo = config.get('climatology', 'endDate') + inputFilesClimo = historyStreams.readpath(streamName, + startDate=startDateClimo, + endDate=endDateClimo, + calendar=calendar) + simulationStartTime = get_simulation_start_time(runStreams) + + print '\n List of files for climatologies:\n' \ + ' {} through\n {}'.format( + os.path.basename(inputFilesClimo[0]), + os.path.basename(inputFilesClimo[-1])) + + startYearClimo = config.getint('climatology', 'startYear') + endYearClimo = config.getint('climatology', 'endYear') + # Create dictionary to store Climo related variables + dictClimo = {'inputFilesClimo': inputFilesClimo, + 'startDateClimo': startDateClimo, + 'endDateClimo': endDateClimo, + 'startYearClimo': startYearClimo, + 'endYearClimo': endYearClimo} + + # Then a list necessary for the streamfunctionMOC Atlantic timeseries + startDateTseries = config.get('timeSeries', 'startDate') + endDateTseries = config.get('timeSeries', 'endDate') + inputFilesTseries = historyStreams.readpath(streamName, + startDate=startDateTseries, + endDate=endDateTseries, + calendar=calendar) + + print '\n List of files for time series:\n' \ + ' {} through\n {}'.format( + os.path.basename(inputFilesTseries[0]), + os.path.basename(inputFilesTseries[-1])) + + startYearTseries = config.getint('timeSeries', 'startYear') + endYearTseries = config.getint('timeSeries', 'endYear') + # Create dictionary to store Tseries related variables + dictTseries = {'inputFilesTseries': inputFilesTseries, + 'startDateTseries': startDateTseries, + 'endDateTseries': endDateTseries, + 'startYearTseries': startYearTseries, + 'endYearTseries': endYearTseries} + + sectionName = 'streamfunctionMOC' + regionNames = config.getExpression(sectionName, 'regionNames') + + # **** Compute MOC **** + mocAnalysisMemberEnabled = check_analysis_enabled( + namelist=namelist, + analysisOptionName='config_am_mocstreamfunction_enable', + namelistMap=namelistMap, + raiseException=False) + + # Check whether MOC Analysis Member is enabled + if mocAnalysisMemberEnabled: + # Add a moc_analisysMember_processing + print '*** MOC Analysis Member is on ***' + # (mocDictClimo, mocDictTseries) = _compute_moc_analysismember(config, + # streams, calendar, sectionName, dictClimo, dictTseries) + else: + _cache_velocity_climatologies(config, sectionName, + startDateClimo, endDateClimo, + inputFilesClimo, simulationStartTime, + variableMap, calendar) + + # update the start and end year in case they have changed + dictClimo['startYearClimo'] = config.getint('climatology', 'startYear') + dictClimo['endYearClimo'] = config.getint('climatology', 'endYear') + + mocDictClimo, dictRegion = _compute_moc_climo_postprocess( + config, runStreams, variableMap, calendar, sectionName, + regionNames, dictClimo) + dsMOCTimeSeries = _compute_moc_time_series_postprocess( + config, runStreams, variableMap, calendar, sectionName, + regionNames, dictTseries, mocDictClimo, dictRegion) + + # **** Plot MOC **** + # Define plotting variables + mainRunName = config.get('runs', 'mainRunName') + movingAveragePoints = config.getint(sectionName, 'movingAveragePoints') + colorbarLabel = '[Sv]' + xLabel = 'latitude [deg]' + yLabel = 'depth [m]' + + for region in regionNames: + print ' Plot climatological {} MOC...'.format(region) + title = '{} MOC (ANN, years {:04d}-{:04d})\n {}'.format( + region, dictClimo['startYearClimo'], + dictClimo['endYearClimo'], + mainRunName) + figureName = '{}/moc{}_{}_years{:04d}-{:04d}.png'.format( + plotsDirectory, region, mainRunName, + dictClimo['startYearClimo'], dictClimo['endYearClimo']) + contourLevels = config.getExpression(sectionName, + 'contourLevels{}'.format(region), + usenumpyfunc=True) + (colormapName, colorbarLevels) = setup_colormap(config, sectionName, + suffix=region) + + x = mocDictClimo['lat{}'.format(region)]['data'] + y = mocDictClimo['depth']['data'] + z = mocDictClimo['moc{}'.format(region)]['data'] + plot_vertical_section(config, x, y, z, colormapName, colorbarLevels, + contourLevels, colorbarLabel, title, + xLabel, yLabel, figureName) + + # Plot time series + print ' Plot time series of max Atlantic MOC at 26.5N...' + xLabel = 'Time [years]' + yLabel = '[Sv]' + title = 'Max Atlantic MOC at $26.5^\circ$N\n {}'.format(mainRunName) + figureName = '{}/mocTimeseries_{}.png'.format(plotsDirectory, + mainRunName) + + timeseries_analysis_plot(config, [dsMOCTimeSeries.mocAtlantic26], + movingAveragePoints, title, + xLabel, yLabel, figureName, + lineStyles=['k-'], lineWidths=[1.5], + calendar=calendar) + # }}} + + +def _load_mesh(runStreams): # {{{ + # Load mesh related variables + try: + restartFile = runStreams.readpath('restart')[0] + except ValueError: + raise IOError('No MPAS-O restart file found: need at least one ' + 'restart file for MOC calculation') + ncFile = netCDF4.Dataset(restartFile, mode='r') + dvEdge = ncFile.variables['dvEdge'][:] + areaCell = ncFile.variables['areaCell'][:] + refBottomDepth = ncFile.variables['refBottomDepth'][:] + latCell = ncFile.variables['latCell'][:] + latCell = latCell * rad_to_deg # convert to degree + ncFile.close() + nVertLevels = len(refBottomDepth) + refTopDepth = np.zeros(nVertLevels+1) + refTopDepth[1:nVertLevels+1] = refBottomDepth[0:nVertLevels] + refLayerThickness = np.zeros(nVertLevels) + refLayerThickness[0] = refBottomDepth[0] + refLayerThickness[1:nVertLevels] = (refBottomDepth[1:nVertLevels] - + refBottomDepth[0:nVertLevels-1]) + + return dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \ + refTopDepth, refLayerThickness # }}} + + +def _cache_velocity_climatologies(config, sectionName, + startDateClimo, endDateClimo, + inputFilesClimo, simulationStartTime, + variableMap, calendar): # {{{ + '''compute yearly velocity climatologies and cache them''' + + variableList = ['avgNormalVelocity', + 'avgVertVelocityTop'] + + outputDirectory = build_config_full_path(config, 'output', + 'mpasClimatologySubdirectory') + + make_directories(outputDirectory) + + chunking = config.getExpression(sectionName, 'maxChunkSize') + ds = open_multifile_dataset(fileNames=inputFilesClimo, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=variableList, + variableMap=variableMap, + startDate=startDateClimo, + endDate=endDateClimo, + chunking=chunking) + + # update the start and end year in config based on the real extend of ds + update_start_end_year(ds, config, calendar) + + cachePrefix = '{}/meanVelocity'.format(outputDirectory) + + # compute and cache the velocity climatology + cache_climatologies(ds, monthDictionary['ANN'], + config, cachePrefix, calendar, + printProgress=True) + # }}} + + +def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, + sectionName, regionNames, dictClimo): # {{{ + + '''compute mean MOC streamfunction as a post-process''' + + dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \ + refTopDepth, refLayerThickness = _load_mesh(runStreams) + + # Load basin region related variables and save them to dictionary + # NB: The following will need to change with new regional mapping files + regionMaskFiles = config.get(sectionName, 'regionMaskFiles') + if not os.path.exists(regionMaskFiles): + raise IOError('Regional masking file for MOC calculation ' + 'does not exist') + iRegion = 0 + for region in regionNames: + print '\n Reading region and transect mask for {}...'.format(region) + ncFileRegional = netCDF4.Dataset(regionMaskFiles, mode='r') + maxEdgesInTransect = \ + ncFileRegional.dimensions['maxEdgesInTransect'].size + transectEdgeMaskSigns = \ + ncFileRegional.variables['transectEdgeMaskSigns'][:, iRegion] + transectEdgeGlobalIDs = \ + ncFileRegional.variables['transectEdgeGlobalIDs'][iRegion, :] + regionCellMask = \ + ncFileRegional.variables['regionCellMasks'][:, iRegion] + ncFileRegional.close() + iRegion += 1 + + indRegion = np.where(regionCellMask == 1) + dictRegion = { + 'ind{}'.format(region): indRegion, + '{}CellMask'.format(region): regionCellMask, + 'maxEdgesInTransect{}'.format(region): maxEdgesInTransect, + 'transectEdgeMaskSigns{}'.format(region): transectEdgeMaskSigns, + 'transectEdgeGlobalIDs{}'.format(region): transectEdgeGlobalIDs} + # Add Global regionCellMask=1 everywhere to make the algorithm + # for the global moc similar to that of the regional moc + dictRegion['GlobalCellMask'] = np.ones(np.size(latCell)) + regionNames[:0] = ['Global'] + + # Compute and plot annual climatology of MOC streamfunction + print '\n Compute and/or plot post-processed MOC climatological '\ + 'streamfunction...' + outputDirectory = build_config_full_path(config, 'output', + 'mpasClimatologySubdirectory') + + make_directories(outputDirectory) + + outputFileClimo = '{}/mocStreamfunction_years{:04d}-{:04d}.nc'.format( + outputDirectory, dictClimo['startYearClimo'], + dictClimo['endYearClimo']) + if not os.path.exists(outputFileClimo): + print ' Load data...' + + velClimoFile = '{}/meanVelocity_years{:04d}-{:04d}.nc'.format( + outputDirectory, dictClimo['startYearClimo'], + dictClimo['endYearClimo']) + + annualClimatology = xr.open_dataset(velClimoFile) + + # Convert to numpy arrays + # (can result in a memory error for large array size) + horizontalVel = annualClimatology.avgNormalVelocity.values + verticalVel = annualClimatology.avgVertVelocityTop.values + velArea = verticalVel * areaCell[:, np.newaxis] + + # Create dictionary for MOC climatology (NB: need this form + # in order to convert it to xarray dataset later in the script) + mocDictClimo = {'depth': {'dims': ('nz'), 'data': refTopDepth}} + for region in regionNames: + print ' Compute {} MOC...'.format(region) + print ' Compute transport through region southern transect...' + if region == 'Global': + transportZ = np.zeros(nVertLevels) + else: + maxEdgesInTransect = \ + dictRegion['maxEdgesInTransect{}'.format(region)] + transectEdgeGlobalIDs = \ + dictRegion['transectEdgeGlobalIDs{}'.format(region)] + transectEdgeMaskSigns = \ + dictRegion['transectEdgeMaskSigns{}'.format(region)] + transportZ = _compute_transport(maxEdgesInTransect, + transectEdgeGlobalIDs, + transectEdgeMaskSigns, + nVertLevels, dvEdge, + refLayerThickness, + horizontalVel) + + regionCellMask = dictRegion['{}CellMask'.format(region)] + latBinSize = config.getExpression(sectionName, + 'latBinSize{}'.format(region)) + if region == 'Global': + latBins = np.arange(-90.0, 90.1, latBinSize) + else: + indRegion = dictRegion['ind{}'.format(region)] + latBins = latCell[indRegion] + latBins = np.arange(np.amin(latBins), + np.amax(latBins)+latBinSize, + latBinSize) + mocTop = _compute_moc(latBins, nVertLevels, latCell, + regionCellMask, transportZ, velArea) + + # Store computed MOC to dictionary + mocDictClimo['lat{}'.format(region)] = { + 'dims': ('nx{}'.format(region)), 'data': latBins} + mocDictClimo['moc{}'.format(region)] = { + 'dims': ('nz', 'nx{}'.format(region)), 'data': mocTop} + + # Save to file + print ' Save global and regional MOC to file...' + ncFile = netCDF4.Dataset(outputFileClimo, mode='w') + # create dimensions + ncFile.createDimension('nz', len(refTopDepth)) + for region in regionNames: + latBins = mocDictClimo['lat{}'.format(region)]['data'] + mocTop = mocDictClimo['moc{}'.format(region)]['data'] + ncFile.createDimension('nx{}'.format(region), len(latBins)) + # create variables + x = ncFile.createVariable('lat{}'.format(region), 'f4', + ('nx{}'.format(region),)) + x.description = 'latitude bins for MOC {}'\ + ' streamfunction'.format(region) + x.units = 'degrees (-90 to 90)' + y = ncFile.createVariable('moc{}'.format(region), 'f4', + ('nz', 'nx{}'.format(region))) + y.description = 'MOC {} streamfunction, annual'\ + ' climatology'.format(region) + y.units = 'Sv (10^6 m^3/s)' + # save variables + x[:] = latBins + y[:, :] = mocTop + depth = ncFile.createVariable('depth', 'f4', ('nz',)) + depth.description = 'depth' + depth.units = 'meters' + depth[:] = refTopDepth + ncFile.close() + else: + # Read from file + print ' Read previously computed MOC streamfunction from file...' + ncFile = netCDF4.Dataset(outputFileClimo, mode='r') + refTopDepth = ncFile.variables['depth'][:] + mocDictClimo = {'depth': {'dims': ('nz'), 'data': refTopDepth}} + for region in regionNames: + latBins = ncFile.variables['lat{}'.format(region)][:] + mocTop = ncFile.variables['moc{}'.format(region)][:, :] + mocDictClimo['lat{}'.format(region)] = { + 'dims': ('nx{}'.format(region)), 'data': latBins} + mocDictClimo['moc{}'.format(region)] = { + 'dims': ('nz', 'nx{}'.format(region)), 'data': mocTop} + ncFile.close() + return mocDictClimo, dictRegion # }}} + + +def _compute_moc_time_series_postprocess(config, runStreams, variableMap, + calendar, sectionName, regionNames, + dictTseries, mocDictClimo, + dictRegion): # {{{ + '''compute MOC time series as a post-process''' + + # Compute and plot time series of Atlantic MOC at 26.5N (RAPID array) + print '\n Compute and/or plot post-processed Atlantic MOC '\ + 'time series...' + print ' Load data...' + + simulationStartTime = get_simulation_start_time(runStreams) + variableList = ['avgNormalVelocity', + 'avgVertVelocityTop'] + + dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \ + refTopDepth, refLayerThickness = _load_mesh(runStreams) + + chunking = config.getExpression(sectionName, 'maxChunkSize') + ds = open_multifile_dataset(fileNames=dictTseries['inputFilesTseries'], + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=variableList, + variableMap=variableMap, + startDate=dictTseries['startDateTseries'], + endDate=dictTseries['endDateTseries'], + chunking=chunking) + latAtlantic = mocDictClimo['latAtlantic']['data'] + dLat = latAtlantic - 26.5 + indlat26 = np.where(dLat == np.amin(np.abs(dLat))) + + maxEdgesInTransect = dictRegion['maxEdgesInTransectAtlantic'] + transectEdgeGlobalIDs = dictRegion['transectEdgeGlobalIDsAtlantic'] + transectEdgeMaskSigns = dictRegion['transectEdgeMaskSignsAtlantic'] + regionCellMask = dictRegion['AtlanticCellMask'] + + outputDirectory = build_config_full_path(config, 'output', + 'timeseriesSubdirectory') + try: + os.makedirs(outputDirectory) + except OSError: + pass + + outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory) + + continueOutput = os.path.exists(outputFileTseries) + if continueOutput: + print ' Read in previously computed MOC time series' + + # add all the other arguments to the function + comp_moc_part = partial(_compute_moc_time_series_part, ds, + calendar, areaCell, latCell, indlat26, + maxEdgesInTransect, transectEdgeGlobalIDs, + transectEdgeMaskSigns, nVertLevels, dvEdge, + refLayerThickness, latAtlantic, regionCellMask) + + dsMOCTimeSeries = time_series.cache_time_series( + ds.Time.values, comp_moc_part, outputFileTseries, + calendar, yearsPerCacheUpdate=1, printProgress=False) + + return dsMOCTimeSeries # }}} + + +def _compute_moc_time_series_part(ds, calendar, areaCell, latCell, indlat26, + maxEdgesInTransect, transectEdgeGlobalIDs, + transectEdgeMaskSigns, nVertLevels, dvEdge, + refLayerThickness, latAtlantic, + regionCellMask, timeIndices, firstCall): + # computes a subset of the MOC time series + + if firstCall: + print ' Process and save time series' + + times = ds.Time[timeIndices].values + mocRegion = np.zeros(timeIndices.shape) + + for localIndex, timeIndex in enumerate(timeIndices): + time = times[localIndex] + dsLocal = ds.isel(Time=timeIndex) + date = days_to_datetime(time, calendar=calendar) + + print ' date: {:04d}-{:02d}'.format(date.year, date.month) + + horizontalVel = dsLocal.avgNormalVelocity.values + verticalVel = dsLocal.avgVertVelocityTop.values + velArea = verticalVel * areaCell[:, np.newaxis] + transportZ = _compute_transport(maxEdgesInTransect, + transectEdgeGlobalIDs, + transectEdgeMaskSigns, + nVertLevels, dvEdge, + refLayerThickness, + horizontalVel) + mocTop = _compute_moc(latAtlantic, nVertLevels, latCell, + regionCellMask, transportZ, velArea) + mocRegion[localIndex] = np.amax(mocTop[:, indlat26]) + + description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \ + 'Array latitude (26.5N)' + + dictonary = {'dims': ['Time'], + 'coords': {'Time': + {'dims': ('Time'), + 'data': times, + 'attrs': {'units': 'days since 0001-01-01'}}}, + 'data_vars': {'mocAtlantic26': + {'dims': ('Time'), + 'data': mocRegion, + 'attrs': {'units': 'Sv (10^6 m^3/s)', + 'description': description}}}} + dsMOC = xr.Dataset.from_dict(dictonary) + return dsMOC + + +# def _compute_moc_analysismember(config): +# +# return (mocDictClimo, mocDictTseries) + + +def _compute_transport(maxEdgesInTransect, transectEdgeGlobalIDs, + transectEdgeMaskSigns, nz, dvEdge, refLayerThickness, + horizontalVel): # {{{ + + '''compute mass transport across southern transect of ocean basin''' + + transportZEdge = np.zeros([nz, maxEdgesInTransect]) + for i in range(maxEdgesInTransect): + if transectEdgeGlobalIDs[i] == 0: + break + # subtract 1 because of python 0-indexing + iEdge = transectEdgeGlobalIDs[i] - 1 + transportZEdge[:, i] = horizontalVel[iEdge, :] * \ + transectEdgeMaskSigns[iEdge, np.newaxis] * \ + dvEdge[iEdge, np.newaxis] * \ + refLayerThickness[np.newaxis, :] + transportZ = transportZEdge.sum(axis=1) + return transportZ # }}} + + +def _compute_moc(latBins, nz, latCell, regionCellMask, transportZ, + velArea): # {{{ + + '''compute meridionally integrated MOC streamfunction''' + + mocTop = np.zeros([np.size(latBins), nz+1]) + mocTop[0, range(1, nz+1)] = transportZ.cumsum() + for iLat in range(1, np.size(latBins)): + indlat = np.logical_and(np.logical_and( + regionCellMask == 1, latCell >= latBins[iLat-1]), + latCell < latBins[iLat]) + mocTop[iLat, :] = mocTop[iLat-1, :] + velArea[indlat, :].sum(axis=0) + # convert m^3/s to Sverdrup + mocTop = mocTop * m3ps_to_Sv + mocTop = mocTop.T + return mocTop # }}} + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/ocean/nino34_index.py b/mpas_analysis/ocean/nino34_index.py new file mode 100644 index 000000000..48f295c7a --- /dev/null +++ b/mpas_analysis/ocean/nino34_index.py @@ -0,0 +1,357 @@ +""" +Computes NINO34 index and plots the time series and power spectra + +Author +------ +Luke Van Roekel, Xylar Asay-Davis + +Last Modified +------------- +04/10/2017 +""" + +import datetime +import xarray as xr +import pandas as pd +import numpy as np +from scipy import signal, stats +import os + +from ..shared.climatology import climatology +from ..shared.constants import constants +from ..shared.io.utility import build_config_full_path +from ..shared.generalized_reader.generalized_reader \ + import open_multifile_dataset + +from ..shared.timekeeping.utility import get_simulation_start_time + +from ..shared.plot.plotting import nino34_timeseries_plot, nino34_spectra_plot + +from ..shared.analysis_task import setup_task + + +def nino34_index(config, streamMap=None, variableMap=None): # {{{ + """ + Computes NINO34 index and plots the time series and power spectrum with + 95 and 99% confidence bounds + + Parameters + ---------- + config: Instance of MpasAnalysisConfigParser containing configuration + options. + + streamMap: dict, optional + a dictionary of MPAS-O variable names that map to their + mpas_analysis counterparts. + + variableMap : dict, optional + a dictionary of MPAS-O variable names that map + to their mpas_analysis counterparts. + + Author + ------ + Luke Van Roekel, Xylar Asay-Davis + + Last Modified + ------------- + 04/10/2017 + """ + + print ' Load SST data...' + field = 'nino' + + # perform common setup for the task + namelist, runStreams, historyStreams, calendar, namelistMap, streamMap, \ + variableMap, plotsDirectory = setup_task(config, componentName='ocean') + + simulationStartTime = get_simulation_start_time(runStreams) + + # get a list of timeSeriesStats output files from the streams file, + # reading only those that are between the start and end dates + startDate = config.get('index', 'startDate') + endDate = config.get('index', 'endDate') + dataSource = config.get('indexNino34', 'observationData') + + observationsDirectory = build_config_full_path( + config, 'oceanObservations', '{}Subdirectory'.format(field)) + + # specify obsTitle based on data path + # These are the only data sets supported + if dataSource == 'HADIsst': + dataPath = "{}/HADIsst_nino34.nc".format(observationsDirectory) + obsTitle = 'HADSST' + else: + dataPath = "{}/ERS_SSTv4_nino34.nc".format(observationsDirectory) + obsTitle = 'ERS SSTv4' + + streamName = historyStreams.find_stream(streamMap['timeSeriesStats']) + fileNames = historyStreams.readpath(streamName, startDate=startDate, + endDate=endDate, calendar=calendar) + print '\n Reading files:\n' \ + ' {} through\n {}'.format( + os.path.basename(fileNames[0]), + os.path.basename(fileNames[-1])) + mainRunName = config.get('runs', 'mainRunName') + + # regionIndex should correspond to NINO34 in surface weighted Average AM + regionIndex = config.getint('indexNino34', 'regionIndicesToPlot') + + # Load data: + varList = ['avgSurfaceTemperature'] + ds = open_multifile_dataset(fileNames=fileNames, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=varList, + variableMap=variableMap, + startDate=startDate, + endDate=endDate) + + # Observations have been processed to the nino34Index prior to reading + dsObs = xr.open_dataset(dataPath) + nino34Obs = dsObs.sst + + print ' Compute NINO3.4 index...' + regionSST = ds.avgSurfaceTemperature.isel(nOceanRegions=regionIndex) + nino34 = compute_nino34_index(regionSST, calendar) + + # Compute the observational index over the entire time range +# nino34Obs = compute_nino34_index(dsObs.sst, calendar) + + print ' Computing NINO3.4 power spectra...' + f, spectra, conf99, conf95, redNoise = compute_nino34_spectra(nino34) + + # Compute the observational spectra over the whole record + fObs, spectraObs, conf99Obs, conf95Obs, redNoiseObs = compute_nino34_spectra(nino34Obs) + + # Compute the observational spectra over the last 30 years for comparison + # Only saving the spectra + time_start = datetime.datetime(1976, 1, 1) + time_end = datetime.datetime(2016, 12, 31) + nino3430 = nino34Obs.sel(Time=slice(time_start, time_end)) + f30, spectra30yrs, conf9930, conf9530, redNoise30 = compute_nino34_spectra(nino3430) + + # Convert frequencies to period in years + f = 1.0 / (constants.eps + f*constants.sec_per_year) + fObs = 1.0 / (constants.eps + fObs*constants.sec_per_year) + f30 = 1.0 / (constants.eps + f30*constants.sec_per_year) + + print ' Plot NINO3.4 index and spectra...' + + figureName = '{}/NINO34_{}.png'.format(plotsDirectory, mainRunName) + modelTitle = "{}".format(mainRunName) + nino34_timeseries_plot(config, nino34, nino34Obs, nino3430, 'NINO 3.4 Index', + modelTitle, obsTitle, figureName, linewidths=2, + calendar=calendar) + + figureName = '{}/NINO34_spectra_{}.png'.format(plotsDirectory, mainRunName) + nino34_spectra_plot(config, f, spectra, conf95, conf99, redNoise, + fObs, f30, spectraObs, conf95Obs, conf99Obs, redNoiseObs, + spectra30yrs, conf9530, conf9930, redNoise30, + 'NINO3.4 power spectrum', modelTitle, + obsTitle, figureName, linewidths=2) + # }}} + + +def compute_nino34_index(regionSST, calendar): # {{{ + """ + Computes nino34 index time series. It follow the standard nino34 + algorithm, i.e., + + 1. Compute monthly average SST in the region + 2. Computes anomalous SST + 3. Performs a 5 month running mean over the anomalies + + This routine requires regionSST to be the SSTs in the nino3.4 region ONLY. + It is defined as lat > -5S and lat < 5N and lon > 190E and lon < 240E. + + Parameters + ---------- + regionSST : xarray.DataArray object + values of SST in the nino region + + calendar: {'gregorian', 'gregorian_noleap'} + The name of the calendars used in the MPAS run + + Returns + ------- + xarray.DataArray object containing the nino34index + + Author + ------ + Luke Van Roekel, Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + """ + + if not isinstance(regionSST, xr.core.dataarray.DataArray): + raise ValueError('regionSST should be an xarray DataArray') + + # add 'month' data array so we can group by month below. + regionSST = climatology.add_years_months_days_in_month(regionSST, calendar) + + # Compute monthly average and anomaly of climatology of SST + monthlyClimatology = \ + climatology.compute_monthly_climatology(regionSST, maskVaries=False) + + anomaly = regionSST.groupby('month') - monthlyClimatology + + # Remove the long term trend from the anomalies + detrendedAnomal = signal.detrend(anomaly.values) + anomaly.values = detrendedAnomal + + # Compute 5 month running mean + wgts = np.ones(5) / 5. + return _running_mean(anomaly, wgts) # }}} + + +def compute_nino34_spectra(nino34Index): # {{{ + """ + Computes power spectra of Nino34 index. + + nino34Index is the NINO index computed by compute_nino34_index + + The algorithm follows the NCL cvdp package see + http://www.cesm.ucar.edu/working_groups/CVC/cvdp/code.html + + Parameters + ---------- + nino34Index : xarray.DataArray object + nino34Index for analysis + + Returns + ------- + pxxSmooth : xarray.DataArray object + nino34Index power spectra that has been smoothed with a modified + Daniell window (https://www.ncl.ucar.edu/Document/Functions/Built-in/specx_anal.shtml) + + + f : numpy.array + array of frequencies corresponding to the center of the spectral + bins resulting from the analysis + + mkov*scale : numpy.array + Red noise fit to pxxSmooth + + mkov*scale*xLow : numpy.array + 95% confidence threshold from chi-squared test + + mkov*scale*xHigh : numpy.array + 99% confidence threshold from chi-squared test + + Author + ------ + Luke Van Roekel, Xylar Asay-Davis + + Last Modified + ------------- + 04/10/2017 + """ + + # Move nino34Index to numpy to allow functionality with scipy routines + ninoIndex = nino34Index.values + window = signal.tukey(len(ninoIndex), alpha=0.1) + f, Pxx = signal.periodogram(window * ninoIndex, + 1.0 / constants.sec_per_month) + + # computes power spectra, smoothed with a weighted running mean + nwts = max(1, int(7*len(ninoIndex) / 1200)) + # verify window length is odd, if not, add 1 + if nwts % 2 == 0: + nwts += 1 + # Calculate the weights for the running mean + # Weights are from the modified Daniell Window + wgts = np.ones(nwts) + wgts[0] = 0.5 + wgts[-1] = 0.5 + wgts /= sum(wgts) + + pxxSmooth = _running_mean(pd.Series(Pxx), wgts) / constants.sec_per_month + + # compute 99 and 95% confidence intervals and red-noise process + # Uses Chi squared test + + r = _autocorr(ninoIndex)[0, 1] + r2 = 2.*r + rsq = r**2 + + # In the temp2 variable, f is converted to give wavenumber, i.e. + # 0,1,2,...,N/2 + temp2 = r2*np.cos(2.*np.pi*f*constants.sec_per_month) + mkov = 1. / (1. + rsq - temp2) + + sum1 = np.sum(mkov) + sum2 = np.sum(pxxSmooth.values) + scale = sum2 / sum1 + + df = 2. / (constants.tapcoef * sum(wgts**2)) + xLow = stats.chi2.interval(0.95, df)[1]/df + xHigh = stats.chi2.interval(0.99, df)[1]/df + + # return Spectra, 99% confidence level, 95% confidence level, + # and Red-noise fit + return f, pxxSmooth, mkov*scale*xHigh, mkov*scale*xLow, mkov*scale # }}} + + +def _autocorr(x, t=1): # {{{ + """ + Computes lag one auto-correlation for the NINO34 spectra calculation + + Parameters + ---------- + x : numpy 1-D array + time series array + + Returns + ------- + Single value giving the lag one auto-correlation + If t != 1, this is no longer a lag one auto-correlation + + Author + ------ + Luke Van Roekel + + Last Modified + ------------- + 03/22/2017 + """ + + return np.corrcoef(np.array([x[0:len(x)-t], x[t:len(x)]])) # }}} + + +def _running_mean(inputData, wgts): # {{{ + """ + Calculates a generic weighted running mean + + Parameters + ---------- + inputData : xr.DataArray + Data to be smoothed + + wgts : numpy.array + array of weights that give the smoothing type + for the nino index this is a 5-point boxcar window + for the nino power spectra this is a modified Daniell window (see + https://www.ncl.ucar.edu/Document/Functions/Built-in/specx_anal.shtml) + + Author + ------ + Luke Van Roekel, Xylar Asay-Davis + + Last Modified + ------------- + 04/10/2017 + """ + + nt = len(inputData) + sp = (len(wgts) - 1)/2 + runningMean = inputData.copy() + for k in range(sp, nt-(sp+1)): + runningMean[k] = sum(wgts*inputData[k-sp:k+sp+1].values) + + return runningMean # }}} + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/ocean/ocean_modelvsobs.py b/mpas_analysis/ocean/ocean_modelvsobs.py index a459c7175..eb6028e83 100644 --- a/mpas_analysis/ocean/ocean_modelvsobs.py +++ b/mpas_analysis/ocean/ocean_modelvsobs.py @@ -1,274 +1,336 @@ -#!/usr/bin/env python """ General comparison of 2-d model fields against data. Currently only supports -mixed layer depths (mld) and sea surface temperature (sst) +sea surface temperature (sst), sea surface salinity (sss) and mixed layer +depth (mld) -Author: Luke Van Roekel, Milena Veneziani, Xylar Asay-Davis -Last Modified: 12/06/2016 -""" +Authors +------- +Luke Van Roekel, Xylar Asay-Davis, Milena Veneziani -import matplotlib.pyplot as plt -import matplotlib.colors as cols +Last Modified +------------- +04/08/2017 +""" -import numpy as np import xarray as xr import datetime -from netCDF4 import Dataset as netcdf_dataset +import numpy as np +import netCDF4 +import os -from ..shared.mpas_xarray.mpas_xarray import preprocess_mpas, \ - remove_repeated_time_index -from ..shared.plot.plotting import plot_global_comparison -from ..shared.interpolation.interpolate import interp_fields, init_tree +from ..shared.interpolation import interpolate + +from ..shared.plot.plotting import plot_global_comparison, \ + setup_colormap from ..shared.constants import constants -from ..shared.io import StreamsFile +from ..shared.io.utility import build_config_full_path + +from ..shared.generalized_reader.generalized_reader \ + import open_multifile_dataset + +from ..shared.timekeeping.utility import get_simulation_start_time +from ..shared.climatology import climatology -def ocn_modelvsobs(config, field, streamMap=None, variableMap=None): +from ..shared.analysis_task import setup_task + +from ..shared.mpas_xarray import mpas_xarray + + +def ocn_modelvsobs(config, field): """ Plots a comparison of ACME/MPAS output to SST or MLD observations - config is an instance of MpasAnalysisConfigParser containing configuration - options. + Parameters + ---------- + config : instance of MpasAnalysisConfigParser + Contains configuration options - field is the name of a field to be analyize (currently one of 'mld' or - 'sst') + field : {'sst', 'sss', 'mld'} + The name of a field to be analyized - If present, streamMap is a dictionary of MPAS-O stream names that map to - their mpas_analysis counterparts. + Authors + ------- + Luke Van Roekel, Xylar Asay-Davis, Milena Veneziani - If present, variableMap is a dictionary of MPAS-O variable names that map - to their mpas_analysis counterparts. - - Authors: Luke Van Roekel, Milena Veneziani, Xylar Asay-Davis - Modified: 12/08/2016 + Last Modified + ------------- + 04/08/2017 """ - # read parameters from config file - indir = config.get('paths', 'archive_dir_ocn') + # perform common setup for the task + namelist, runStreams, historyStreams, calendar, namelistMap, streamMap, \ + variableMap, plotsDirectory = setup_task(config, componentName='ocean') - streams_filename = config.get('input', 'ocean_streams_filename') - streams = StreamsFile(streams_filename, streamsdir=indir) + simulationStartTime = get_simulation_start_time(runStreams) # get a list of timeSeriesStats output files from the streams file, # reading only those that are between the start and end dates - startDate = config.get('time', 'climo_start_date') - endDate = config.get('time', 'climo_end_date') - streamName = streams.find_stream(streamMap['timeSeriesStats']) - infiles = streams.readpath(streamName, startDate=startDate, - endDate=endDate) - print 'Reading files {} through {}'.format(infiles[0], infiles[-1]) - - plots_dir = config.get('paths', 'plots_dir') - obsdir = config.get('paths', 'obs_' + field + 'dir') - casename = config.get('case', 'casename') - meshfile = config.get('data', 'mpas_meshfile') - climo_yr1 = config.getint('time', 'climo_yr1') - climo_yr2 = config.getint('time', 'climo_yr2') - yr_offset = config.getint('time', 'yr_offset') - - outputTimes = config.getExpression(field + '_modelvsobs', - 'comparisonTimes') - - f = netcdf_dataset(meshfile, mode='r') - lonCell = f.variables["lonCell"][:] - latCell = f.variables["latCell"][:] + startDate = config.get('climatology', 'startDate') + endDate = config.get('climatology', 'endDate') + streamName = historyStreams.find_stream(streamMap['timeSeriesStats']) + inputFiles = historyStreams.readpath(streamName, startDate=startDate, + endDate=endDate, calendar=calendar) + print '\n Reading files:\n' \ + ' {} through\n {}'.format( + os.path.basename(inputFiles[0]), + os.path.basename(inputFiles[-1])) + + observationsDirectory = build_config_full_path( + config, 'oceanObservations', '{}Subdirectory'.format(field)) + mainRunName = config.get('runs', 'mainRunName') + + overwriteMpasClimatology = config.getWithDefault( + 'climatology', 'overwriteMpasClimatology', False) + + overwriteObsClimatology = config.getWithDefault( + 'oceanObservations', 'overwriteObsClimatology', False) + + try: + restartFileName = runStreams.readpath('restart')[0] + except ValueError: + raise IOError('No MPAS-O restart file found: need at least one ' + 'restart file for ocn_modelvsobs calculation') + + sectionName = 'regridded{}'.format(field.upper()) + outputTimes = config.getExpression(sectionName, 'comparisonTimes') + + # get a list of regridded observations files and check if they exist. If + # they are all there, we don't have to do anything else with the + # observations + obsFileNames = \ + {'mld': "{}/holtetalley_mld_climatology.nc".format( + observationsDirectory), + 'sst': "{}/MODEL.SST.HAD187001-198110.OI198111-201203.nc".format( + observationsDirectory), + 'sss': "{}/Aquarius_V3_SSS_Monthly.nc".format( + observationsDirectory)} + + obsFileName = obsFileNames[field] + + buildObsClimatologies = overwriteObsClimatology + for months in outputTimes: + (climatologyFileName, regriddedFileName) = \ + climatology.get_observation_climatology_file_names( + config=config, fieldName=field, monthNames=months, + componentName='ocean', gridFileName=obsFileName, + latVarName='lat', lonVarName='lon') + if not os.path.exists(regriddedFileName): + buildObsClimatologies = True + break varList = [field] if field == 'mld': - selvals = None - - # Load MLD observational data - obs_filename = "{}/holtetalley_mld_climatology.nc".format(obsdir) - dsData = xr.open_mfdataset(obs_filename) - - # Increment month value to be consistent with the model output - dsData.iMONTH.values += 1 - - # Rename the time dimension to be consistent with the SST dataset - dsData.rename({'month': 'calmonth'}, inplace=True) - dsData.rename({'iMONTH': 'month'}, inplace=True) + iselvals = None obsFieldName = 'mld_dt_mean' - # Reorder dataset for consistence - dsData = dsData.transpose('month', 'iLON', 'iLAT') + if buildObsClimatologies: + # Load MLD observational data + dsObs = xr.open_mfdataset(obsFileName) + + # Increment month value to be consistent with the model output + dsObs.iMONTH.values += 1 + + # Rename the dimensions to be consistent with other obs. data sets + dsObs.rename({'month': 'calmonth', 'lat': 'latCoord', + 'lon': 'lonCoord'}, inplace=True) + dsObs.rename({'iMONTH': 'Time', 'iLAT': 'lat', 'iLON': 'lon'}, + inplace=True) + # set the coordinates now that the dimensions have the same names + dsObs.coords['lat'] = dsObs['latCoord'] + dsObs.coords['lon'] = dsObs['lonCoord'] + dsObs.coords['Time'] = dsObs['calmonth'] + dsObs.coords['month'] = ('Time', np.array(dsObs['calmonth'], int)) + # no meaningful year since this is already a climatology + dsObs.coords['year'] = ('Time', np.ones(dsObs.dims['Time'], int)) + + dsObs = mpas_xarray.subset_variables(dsObs, [obsFieldName, + 'month']) + # Reorder dataset for consistence with other obs. data sets + dsObs = dsObs.transpose('Time', 'lat', 'lon') # Set appropriate MLD figure labels - obsTitleLabel = "Observations (HolteTalley density threshold MLD)" - fileOutLabel = "mldHolteTalleyARGO" + observationTitleLabel = \ + "Observations (HolteTalley density threshold MLD)" + outFileLabel = "mldHolteTalleyARGO" unitsLabel = 'm' elif field == 'sst': - selvals = {'nVertLevels': 0} - - obs_filename = \ - "{}/MODEL.SST.HAD187001-198110.OI198111-201203.nc".format(obsdir) - dsData = xr.open_mfdataset(obs_filename) - # Select years for averaging (pre-industrial or present-day) - # This seems fragile as definitions can change - if yr_offset < 1900: - time_start = datetime.datetime(1870, 1, 1) - time_end = datetime.datetime(1900, 12, 31) - preIndustrial_txt = "pre-industrial 1870-1900" + iselvals = {'nVertLevels': 0} + + climStartYear = config.getint('oceanObservations', + 'sstClimatologyStartYear') + climEndYear = config.getint('oceanObservations', + 'sstClimatologyEndYear') + timeStart = datetime.datetime(year=climStartYear, month=1, day=1) + timeEnd = datetime.datetime(year=climEndYear, month=12, day=31) + + if climStartYear < 1925: + period = 'pre-industrial' else: - time_start = datetime.datetime(1990, 1, 1) - time_end = datetime.datetime(2011, 12, 31) - preIndustrial_txt = "present-day 1990-2011" + period = 'present-day' - ds_tslice = dsData.sel(time=slice(time_start, time_end)) - monthly_clim_data = ds_tslice.groupby('time.month').mean('time') + if buildObsClimatologies: + dsObs = xr.open_mfdataset(obsFileName) + dsObs.rename({'time': 'Time'}, inplace=True) + dsObs = dsObs.transpose('Time', 'lat', 'lon') + dsObs = dsObs.sel(Time=slice(timeStart, timeEnd)) + dsObs.coords['month'] = dsObs['Time.month'] + dsObs.coords['year'] = dsObs['Time.year'] - # Rename the observation data for code compactness - dsData = monthly_clim_data.transpose('month', 'lon', 'lat') obsFieldName = 'SST' # Set appropriate figure labels for SST - obsTitleLabel = \ - "Observations (Hadley/OI, {})".format(preIndustrial_txt) - fileOutLabel = "sstHADOI" + observationTitleLabel = \ + "Observations (Hadley/OI, {} {:04d}-{:04d})".format(period, + climStartYear, + climEndYear) + outFileLabel = "sstHADOI" unitsLabel = r'$^o$C' elif field == 'sss': - selvals = {'nVertLevels': 0} - - obs_filename = "{}/Aquarius_V3_SSS_Monthly.nc".format(obsdir) - dsData = xr.open_mfdataset(obs_filename) - - time_start = datetime.datetime(2011, 8, 1) - time_end = datetime.datetime(2014, 12, 31) - - ds_tslice = dsData.sel(time=slice(time_start, time_end)) + iselvals = {'nVertLevels': 0} - # The following line converts from DASK to numpy to supress an odd - # warning that doesn't influence the figure output - ds_tslice.SSS.values + timeStart = datetime.datetime(2011, 8, 1) + timeEnd = datetime.datetime(2014, 12, 31) - monthly_clim_data = ds_tslice.groupby('time.month').mean('time') + if buildObsClimatologies: + dsObs = xr.open_mfdataset(obsFileName) + dsObs.rename({'time': 'Time'}, inplace=True) + dsObs = dsObs.transpose('Time', 'lat', 'lon') + dsObs = dsObs.sel(Time=slice(timeStart, timeEnd)) + dsObs.coords['month'] = dsObs['Time.month'] + dsObs.coords['year'] = dsObs['Time.year'] - # Rename the observation data for code compactness - dsData = monthly_clim_data.transpose('month', 'lon', 'lat') obsFieldName = 'SSS' - # Set appropriate figure labels for SSS - preIndustrial_txt = "2011-2014" - - obsTitleLabel = "Observations (Aquarius, {})".format(preIndustrial_txt) - fileOutLabel = 'sssAquarius' + observationTitleLabel = "Observations (Aquarius, 2011-2014)" + outFileLabel = 'sssAquarius' unitsLabel = 'PSU' - ds = xr.open_mfdataset( - infiles, - preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset, - timestr='Time', - onlyvars=varList, - selvals=selvals, - varmap=variableMap)) - ds = remove_repeated_time_index(ds) - - time_start = datetime.datetime(yr_offset+climo_yr1, 1, 1) - time_end = datetime.datetime(yr_offset+climo_yr2, 12, 31) - ds_tslice = ds.sel(Time=slice(time_start, time_end)) - monthly_clim = ds_tslice.groupby('Time.month').mean('Time') - - latData, lonData = np.meshgrid(dsData.lat.values, dsData.lon.values) - latData = latData.flatten() - lonData = lonData.flatten() - - daysarray = np.ones((12, dsData[obsFieldName].values.shape[1], - dsData[obsFieldName].values.shape[2])) - - for i, dval in enumerate(constants.dinmonth): - daysarray[i, :, :] = dval - inds = np.where(np.isnan(dsData[obsFieldName][i, :, :].values)) - daysarray[i, inds[0], inds[1]] = np.NaN - - # initialize interpolation variables - d2, inds2, lonTarg, latTarg = init_tree(np.rad2deg(lonCell), - np.rad2deg(latCell), - constants.lonmin, - constants.lonmax, - constants.latmin, - constants.latmax, - constants.dLongitude, - constants.dLatitude) - d, inds, lonTargD, latTargD = init_tree(lonData, latData, - constants.lonmin, - constants.lonmax, - constants.latmin, - constants.latmax, - constants.dLongitude, - constants.dLatitude) - nLon = lonTarg.shape[0] - nLat = lonTarg.shape[1] - - modelOutput = np.zeros((len(outputTimes), nLon, nLat)) - observations = np.zeros((len(outputTimes), nLon, nLat)) - bias = np.zeros((len(outputTimes), nLon, nLat)) + ds = open_multifile_dataset(fileNames=inputFiles, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=varList, + iselValues=iselvals, + variableMap=variableMap, + startDate=startDate, + endDate=endDate) + + changed, startYear, endYear = \ + climatology.update_start_end_year(ds, config, calendar) + + mpasMappingFileName = climatology.write_mpas_mapping_file( + config=config, meshFileName=restartFileName) + + if buildObsClimatologies: + obsMappingFileName = \ + climatology.write_observations_mapping_file( + config=config, componentName='ocean', fieldName=field, + gridFileName=obsFileName, latVarName='lat', lonVarName='lon') + else: + obsMappingFileName = None + + (colormapResult, colorbarLevelsResult) = setup_colormap( + config, sectionName, suffix='Result') + (colormapDifference, colorbarLevelsDifference) = setup_colormap( + config, sectionName, suffix='Difference') # Interpolate and compute biases - for i, timestring in enumerate(outputTimes): - monthsvalue = constants.monthdictionary[timestring] - - if isinstance(monthsvalue, (int, long)): - modeldata = monthly_clim.sel(month=monthsvalue)[field].values - obsdata = dsData.sel(month=monthsvalue)[obsFieldName].values - else: - - modeldata = (np.sum( - constants.dinmonth[monthsvalue-1] * - monthly_clim.sel(month=monthsvalue)[field].values.T, axis=1) / - np.sum(constants.dinmonth[monthsvalue-1])) - obsdata = (np.nansum( - daysarray[monthsvalue-1, :, :] * - dsData.sel(month=monthsvalue)[obsFieldName].values, axis=0) / - np.nansum(daysarray[monthsvalue-1, :, :], axis=0)) - - modelOutput[i, :, :] = interp_fields(modeldata, d2, inds2, lonTarg) - observations[i, :, :] = interp_fields(obsdata.flatten(), d, inds, - lonTargD) - - for i in range(len(outputTimes)): - bias[i, :, :] = modelOutput[i, :, :] - observations[i, :, :] - - clevsModelObs = config.getExpression(field + '_modelvsobs', - 'clevsModelObs') - cmap = plt.get_cmap(config.get(field + '_modelvsobs', - 'cmapModelObs')) - cmapIndices = config.getExpression(field + '_modelvsobs', - 'cmapIndicesModelObs') - cmapModelObs = cols.ListedColormap(cmap(cmapIndices), "cmapModelObs") - clevsDiff = config.getExpression(field + '_modelvsobs', - 'clevsDiff') - cmap = plt.get_cmap(config.get(field + '_modelvsobs', 'cmapDiff')) - cmapIndices = config.getExpression(field + '_modelvsobs', - 'cmapIndicesDiff') - cmapDiff = cols.ListedColormap(cmap(cmapIndices), "cmapDiff") - - for i in range(len(outputTimes)): - fileout = "{}/{}_{}_{}_years{:04d}-{:04d}.png".format( - plots_dir, fileOutLabel, casename, outputTimes[i], climo_yr1, - climo_yr2) + for months in outputTimes: + monthValues = constants.monthDictionary[months] + + (climatologyFileName, climatologyPrefix, regriddedFileName) = \ + climatology.get_mpas_climatology_file_names(config=config, + fieldName=field, + monthNames=months) + + if overwriteMpasClimatology or not os.path.exists(climatologyFileName): + seasonalClimatology = climatology.cache_climatologies( + ds, monthValues, config, climatologyPrefix, calendar, + printProgress=True) + # write out the climatology so we can interpolate it with + # interpolate.remap + seasonalClimatology.to_netcdf(climatologyFileName) + + interpolate.remap(inFileName=climatologyFileName, + outFileName=regriddedFileName, + inWeightFileName=mpasMappingFileName, + sourceFileType='mpas', + overwrite=overwriteMpasClimatology) + + ncFile = netCDF4.Dataset(regriddedFileName, mode='r') + modelOutput = ncFile.variables[field][:] + lons = ncFile.variables["lon"][:] + lats = ncFile.variables["lat"][:] + ncFile.close() + lonTarg, latTarg = np.meshgrid(lons, lats) + + # now the observations + (climatologyFileName, regriddedFileName) = \ + climatology.get_observation_climatology_file_names( + config=config, fieldName=field, monthNames=months, + componentName='ocean', gridFileName=obsFileName, + latVarName='lat', lonVarName='lon') + + if buildObsClimatologies: + if (overwriteObsClimatology or + (not os.path.exists(climatologyFileName) and + not os.path.exists(regriddedFileName))): + seasonalClimatology = climatology.compute_climatology( + dsObs, monthValues, maskVaries=True) + # Either we want to overwite files or neither the climatology + # nor its regridded counterpart exist. Write out the + # climatology so we can interpolate it with interpolate.remap + seasonalClimatology.to_netcdf(climatologyFileName) + + if obsMappingFileName is None: + # no remapping is needed + regriddedFileName = climatologyFileName + else: + interpolate.remap(inFileName=climatologyFileName, + outFileName=regriddedFileName, + inWeightFileName=obsMappingFileName, + sourceFileType='latlon', + overwrite=overwriteObsClimatology) + + # read in the results from the remapped files + ncFile = netCDF4.Dataset(regriddedFileName, mode='r') + observations = ncFile.variables[obsFieldName][:] + ncFile.close() + + bias = modelOutput - observations + + outFileName = "{}/{}_{}_{}_years{:04d}-{:04d}.png".format( + plotsDirectory, outFileLabel, mainRunName, + months, startYear, endYear) title = "{} ({}, years {:04d}-{:04d})".format( - field.upper(), outputTimes[i], climo_yr1, climo_yr2) + field.upper(), months, startYear, endYear) plot_global_comparison(config, lonTarg, latTarg, - modelOutput[i, :, :], - observations[i, :, :], - bias[i, :, :], - cmapModelObs, - clevsModelObs, - cmapDiff, - clevsDiff, - fileout=fileout, + modelOutput, + observations, + bias, + colormapResult, + colorbarLevelsResult, + colormapDifference, + colorbarLevelsDifference, + fileout=outFileName, title=title, - modelTitle="{}".format(casename), - obsTitle=obsTitleLabel, + modelTitle="{}".format(mainRunName), + obsTitle=observationTitleLabel, diffTitle="Model-Observations", cbarlabel=unitsLabel) + + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/ocean/ohc_timeseries.py b/mpas_analysis/ocean/ohc_timeseries.py index b6c78634d..6017d407b 100644 --- a/mpas_analysis/ocean/ohc_timeseries.py +++ b/mpas_analysis/ocean/ohc_timeseries.py @@ -1,18 +1,20 @@ import numpy as np import netCDF4 -from netCDF4 import Dataset as netcdf_dataset -import xarray as xr -import pandas as pd -import datetime - -from ..shared.mpas_xarray.mpas_xarray import preprocess_mpas, \ - remove_repeated_time_index +import os from ..shared.plot.plotting import timeseries_analysis_plot -from ..shared.io import NameList, StreamsFile +from ..shared.generalized_reader.generalized_reader \ + import open_multifile_dataset + +from ..shared.timekeeping.utility import get_simulation_start_time, \ + date_to_days, days_to_datetime, string_to_datetime + +from ..shared.analysis_task import setup_task + +from ..shared.time_series import time_series -from ..shared.timekeeping.Date import Date +from ..shared.io.utility import build_config_full_path, make_directories def ohc_timeseries(config, streamMap=None, variableMap=None): @@ -31,65 +33,85 @@ def ohc_timeseries(config, streamMap=None, variableMap=None): to their mpas_analysis counterparts. Author: Xylar Asay-Davis, Milena Veneziani - Last Modified: 01/07/2017 + Last Modified: 04/08/2017 """ - # read parameters from config file - casename = config.get('case', 'casename') - ref_casename_v0 = config.get('case', 'ref_casename_v0') - indir_v0data = config.get('paths', 'ref_archive_v0_ocndir') + def compute_ohc_part(timeIndices, firstCall): + dsLocal = ds.isel(Time=timeIndices) + + dsLocal['ohc'] = rho*cp*dsLocal.sumLayerMaskValue * \ + dsLocal.avgLayerArea * dsLocal.avgLayerThickness * \ + dsLocal.avgLayTemperatureAnomaly + dsLocal.ohc.attrs['units'] = 'J' + dsLocal.ohc.attrs['description'] = 'Ocean heat content in each region' + dsLocal['regionNames'] = ('nOceanRegionsTmp', regionNames) - compare_with_obs = config.getboolean('ohc_timeseries', 'compare_with_obs') + return dsLocal - plots_dir = config.get('paths', 'plots_dir') + # perform common setup for the task + namelist, runStreams, historyStreams, calendar, namelistMap, streamMap, \ + variableMap, plotsDirectory = setup_task(config, componentName='ocean') - yr_offset = config.getint('time', 'yr_offset') + simulationStartTime = get_simulation_start_time(runStreams) - N_movavg = config.getint('ohc_timeseries', 'N_movavg') + # read parameters from config file + mainRunName = config.get('runs', 'mainRunName') + preprocessedReferenceRunName = config.get('runs', + 'preprocessedReferenceRunName') + preprocessedInputDirectory = config.get('oceanPreprocessedReference', + 'baseDirectory') + + compareWithObservations = config.getboolean('timeSeriesOHC', + 'compareWithObservations') + + movingAveragePoints = config.getint('timeSeriesOHC', 'movingAveragePoints') regions = config.getExpression('regions', 'regions') - plot_titles = config.getExpression('regions', 'plot_titles') - iregions = config.getExpression('ohc_timeseries', 'regionIndicesToPlot') + plotTitles = config.getExpression('regions', 'plotTitles') + regionIndicesToPlot = config.getExpression('timeSeriesOHC', + 'regionIndicesToPlot') - indir = config.get('paths', 'archive_dir_ocn') + outputDirectory = build_config_full_path(config, 'output', + 'timeseriesSubdirectory') - namelist_filename = config.get('input', 'ocean_namelist_filename') - namelist = NameList(namelist_filename, path=indir) + make_directories(outputDirectory) - streams_filename = config.get('input', 'ocean_streams_filename') - streams = StreamsFile(streams_filename, streamsdir=indir) + regionNames = config.getExpression('regions', 'regions') + regionNames = [regionNames[index] for index in regionIndicesToPlot] # Note: input file, not a mesh file because we need dycore specific fields # such as refBottomDepth and namelist fields such as config_density0, as - # well as simulationStartTime, that are not guaranteed to be in the mesh file. + # well as simulationStartTime, that are not guaranteed to be in the mesh + # file. try: - inputfile = streams.readpath('restart')[0] + restartFile = runStreams.readpath('restart')[0] except ValueError: - raise IOError('No MPAS-O restart file found: need at least one restart file for OHC calculation') + raise IOError('No MPAS-O restart file found: need at least one ' + 'restart file for OHC calculation') # get a list of timeSeriesStats output files from the streams file, # reading only those that are between the start and end dates - startDate = config.get('time', 'timeseries_start_date') - endDate = config.get('time', 'timeseries_end_date') - streamName = streams.find_stream(streamMap['timeSeriesStats']) - infiles = streams.readpath(streamName, startDate=startDate, - endDate=endDate) - print 'Reading files {} through {}'.format(infiles[0], infiles[-1]) + startDate = config.get('timeSeries', 'startDate') + endDate = config.get('timeSeries', 'endDate') + streamName = historyStreams.find_stream(streamMap['timeSeriesStats']) + fileNames = historyStreams.readpath(streamName, startDate=startDate, + endDate=endDate, calendar=calendar) + print '\n Reading files:\n' \ + ' {} through\n {}'.format( + os.path.basename(fileNames[0]), + os.path.basename(fileNames[-1])) # Define/read in general variables print ' Read in depth and compute specific depth indexes...' - f = netcdf_dataset(inputfile, mode='r') + ncFile = netCDF4.Dataset(restartFile, mode='r') # reference depth [m] - depth = f.variables['refBottomDepth'][:] - # simulation start time - simStartTime = netCDF4.chartostring(f.variables['simulationStartTime'][:]) - simStartTime = str(simStartTime) - f.close() + depth = ncFile.variables['refBottomDepth'][:] + ncFile.close() # specific heat [J/(kg*degC)] cp = namelist.getfloat('config_specific_heat_sea_water') # [kg/m3] rho = namelist.getfloat('config_density0') - fac = 1e-22*rho*cp + factor = 1e-22 k700m = np.where(depth > 700.)[0][0] - 1 k2000m = np.where(depth > 2000.)[0][0] - 1 @@ -98,135 +120,159 @@ def ohc_timeseries(config, streamMap=None, variableMap=None): # Load data print ' Load ocean data...' - varList = ['avgLayerTemperature', - 'sumLayerMaskValue', - 'avgLayerArea', - 'avgLayerThickness'] - ds = xr.open_mfdataset( - infiles, - preprocess=lambda x: preprocess_mpas(x, - yearoffset=yr_offset, - timestr='Time', - onlyvars=varList, - varmap=variableMap)) - - ds = remove_repeated_time_index(ds) - - # convert the start and end dates to datetime objects using - # the Date class, which ensures the results are within the - # supported range - time_start = Date(startDate).to_datetime(yr_offset) - time_end = Date(endDate).to_datetime(yr_offset) - # select only the data in the specified range of years - ds = ds.sel(Time=slice(time_start, time_end)) + variableList = ['avgLayerTemperature', + 'sumLayerMaskValue', + 'avgLayerArea', + 'avgLayerThickness'] + ds = open_multifile_dataset(fileNames=fileNames, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=variableList, + variableMap=variableMap, + startDate=startDate, + endDate=endDate) + + ds = ds.isel(nOceanRegionsTmp=regionIndicesToPlot) + + timeStart = string_to_datetime(startDate) + timeEnd = string_to_datetime(endDate) # Select year-1 data and average it (for later computing anomalies) - time_start_yr1 = Date(simStartTime).to_datetime(yr_offset) - if time_start_yr1 < time_start: - startDate_yr1 = simStartTime - endDate_yr1 = startDate_yr1[0:5]+'12-31'+startDate_yr1[10:] - infiles_yr1 = streams.readpath(streamName, startDate=startDate_yr1, - endDate=endDate_yr1) - ds_yr1 = xr.open_mfdataset( - infiles_yr1, - preprocess=lambda x: preprocess_mpas(x, - yearoffset=yr_offset, - timestr='Time', - onlyvars=varList, - varmap=variableMap)) - - ds_yr1 = remove_repeated_time_index(ds_yr1) + timeStartFirstYear = string_to_datetime(simulationStartTime) + if timeStartFirstYear < timeStart: + startDateFirstYear = simulationStartTime + firstYear = int(startDateFirstYear[0:4]) + endDateFirstYear = '{:04d}-12-31_23:59:59'.format(firstYear) + filesFirstYear = historyStreams.readpath(streamName, + startDate=startDateFirstYear, + endDate=endDateFirstYear, + calendar=calendar) + dsFirstYear = open_multifile_dataset( + fileNames=filesFirstYear, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=['avgLayerTemperature'], + variableMap=variableMap, + startDate=startDateFirstYear, + endDate=endDateFirstYear) + + dsFirstYear = dsFirstYear.isel(nOceanRegionsTmp=regionIndicesToPlot) + + firstYearAvgLayerTemperature = dsFirstYear.avgLayerTemperature else: - time_start = datetime.datetime(time_start.year, 1, 1) - time_end = datetime.datetime(time_start.year, 12, 31) - ds_yr1 = ds.sel(Time=slice(time_start, time_end)) - mean_yr1 = ds_yr1.mean('Time') + firstYearAvgLayerTemperature = ds.avgLayerTemperature + firstYear = timeStart.year + + timeStartFirstYear = date_to_days(year=firstYear, month=1, day=1, + calendar=calendar) + timeEndFirstYear = date_to_days(year=firstYear, month=12, day=31, + hour=23, minute=59, second=59, + calendar=calendar) + + firstYearAvgLayerTemperature = firstYearAvgLayerTemperature.sel( + Time=slice(timeStartFirstYear, timeEndFirstYear)) + + firstYearAvgLayerTemperature = firstYearAvgLayerTemperature.mean('Time') print ' Compute temperature anomalies...' - avgLayerTemperature = ds.avgLayerTemperature - avgLayerTemperature_yr1 = mean_yr1.avgLayerTemperature - - avgLayTemp_anomaly = avgLayerTemperature - avgLayerTemperature_yr1 - - year_start = (pd.to_datetime(ds.Time.min().values)).year - year_end = (pd.to_datetime(ds.Time.max().values)).year - time_start = datetime.datetime(year_start, 1, 1) - time_end = datetime.datetime(year_end, 12, 31) - - if ref_casename_v0 != 'None': - print ' Load in OHC for ACMEv0 case...' - infiles_v0data = '{}/OHC.{}.year*.nc'.format( - indir_v0data, ref_casename_v0) - ds_v0 = xr.open_mfdataset( - infiles_v0data, - preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset)) - ds_v0 = remove_repeated_time_index(ds_v0) - year_end_v0 = (pd.to_datetime(ds_v0.Time.max().values)).year - if year_start <= year_end_v0: - ds_v0_tslice = ds_v0.sel(Time=slice(time_start, time_end)) - else: - print ' Warning: v0 time series lies outside current bounds of v1 time series. Skipping it.' - ref_casename_v0 = 'None' - - sumLayerMaskValue = ds.sumLayerMaskValue - avgLayerArea = ds.avgLayerArea - avgLayerThickness = ds.avgLayerThickness + + ds['avgLayTemperatureAnomaly'] = (ds.avgLayerTemperature - + firstYearAvgLayerTemperature) + + yearStart = days_to_datetime(ds.Time.min(), calendar=calendar).year + yearEnd = days_to_datetime(ds.Time.max(), calendar=calendar).year + timeStart = date_to_days(year=yearStart, month=1, day=1, + calendar=calendar) + timeEnd = date_to_days(year=yearEnd, month=12, day=31, + calendar=calendar) + + if preprocessedReferenceRunName != 'None': + print ' Load in OHC from preprocessed reference run...' + inFilesPreprocessed = '{}/OHC.{}.year*.nc'.format( + preprocessedInputDirectory, preprocessedReferenceRunName) + dsPreprocessed = open_multifile_dataset( + fileNames=inFilesPreprocessed, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='xtime') + yearEndPreprocessed = days_to_datetime(dsPreprocessed.Time.max(), + calendar=calendar).year + if yearStart <= yearEndPreprocessed: + dsPreprocessedTimeSlice = dsPreprocessed.sel(Time=slice(timeStart, + timeEnd)) + else: + print ' Warning: Preprocessed time series ends before the ' \ + 'timeSeries startYear and will not be plotted.' + preprocessedReferenceRunName = 'None' + + cacheFileName = '{}/ohcTimeSeries.nc'.format(outputDirectory) + + dsOHC = time_series.cache_time_series(ds.Time.values, compute_ohc_part, + cacheFileName, calendar, + yearsPerCacheUpdate=10, + printProgress=True) print ' Compute OHC and make plots...' - for index in range(len(iregions)): - iregion = iregions[index] + for index, regionIndex in enumerate(regionIndicesToPlot): - # Compute volume of each layer in the region: - layerArea = sumLayerMaskValue[:, iregion, :] * \ - avgLayerArea[:, iregion, :] - layerVolume = layerArea * avgLayerThickness[:, iregion, :] + ohc = dsOHC.ohc.isel(nOceanRegionsTmp=index) - # Compute OHC: - ohc = layerVolume * avgLayTemp_anomaly[:, iregion, :] # OHC over 0-bottom depth range: - ohc_tot = ohc.sum('nVertLevels') - ohc_tot = fac*ohc_tot + ohcTotal = ohc.sum('nVertLevels') + ohcTotal = factor*ohcTotal # OHC over 0-700m depth range: - ohc_700m = fac*ohc[:, 0:k700m].sum('nVertLevels') + ohc700m = factor*ohc[:, 0:k700m].sum('nVertLevels') # OHC over 700m-2000m depth range: - ohc_2000m = fac*ohc[:, k700m+1:k2000m].sum('nVertLevels') + ohc2000m = factor*ohc[:, k700m+1:k2000m].sum('nVertLevels') # OHC over 2000m-bottom depth range: - ohc_btm = ohc[:, k2000m+1:kbtm].sum('nVertLevels') - ohc_btm = fac*ohc_btm + ohcBottom = ohc[:, k2000m+1:kbtm].sum('nVertLevels') + ohcBottom = factor*ohcBottom title = 'OHC, {}, 0-bottom (thick-), 0-700m (thin-), 700-2000m (--),' \ - ' 2000m-bottom (-.) \n {}'.format(plot_titles[iregion], casename) - - xlabel = 'Time [years]' - ylabel = '[x$10^{22}$ J]' - - if ref_casename_v0 != 'None': - figname = '{}/ohc_{}_{}_{}.png'.format(plots_dir, - regions[iregion], - casename, - ref_casename_v0) - ohc_v0_tot = ds_v0_tslice.ohc_tot - ohc_v0_700m = ds_v0_tslice.ohc_700m - ohc_v0_2000m = ds_v0_tslice.ohc_2000m - ohc_v0_btm = ds_v0_tslice.ohc_btm - title = '{} (r), {} (b)'.format(title, ref_casename_v0) - timeseries_analysis_plot(config, [ohc_tot, ohc_700m, ohc_2000m, - ohc_btm, ohc_v0_tot, ohc_v0_700m, - ohc_v0_2000m, ohc_v0_btm], - N_movavg, title, xlabel, ylabel, figname, + ' 2000m-bottom (-.) \n {}'.format(plotTitles[regionIndex], + mainRunName) + + xLabel = 'Time [years]' + yLabel = '[x$10^{22}$ J]' + + figureName = '{}/ohc_{}_{}.png'.format(plotsDirectory, + regions[regionIndex], + mainRunName) + + if preprocessedReferenceRunName != 'None': + ohcPreprocessedTotal = dsPreprocessedTimeSlice.ohc_tot + ohcPreprocessed700m = dsPreprocessedTimeSlice.ohc_700m + ohcPreprocessed2000m = dsPreprocessedTimeSlice.ohc_2000m + ohcPreprocessedBottom = dsPreprocessedTimeSlice.ohc_btm + title = '{} (r), {} (b)'.format(title, + preprocessedReferenceRunName) + timeseries_analysis_plot(config, [ohcTotal, ohc700m, ohc2000m, + ohcBottom, ohcPreprocessedTotal, + ohcPreprocessed700m, + ohcPreprocessed2000m, + ohcPreprocessedBottom], + movingAveragePoints, title, + xLabel, yLabel, figureName, lineStyles=['r-', 'r-', 'r--', 'r-.', 'b-', 'b-', 'b--', 'b-.'], lineWidths=[2, 1, 1.5, 1.5, 2, 1, 1.5, - 1.5]) - - if not compare_with_obs and ref_casename_v0 == 'None': - figname = '{}/ohc_{}_{}.png'.format(plots_dir, regions[iregion], - casename) - timeseries_analysis_plot(config, [ohc_tot, ohc_700m, ohc_2000m, - ohc_btm], - N_movavg, title, xlabel, ylabel, figname, + 1.5], + calendar=calendar) + + if (not compareWithObservations and + preprocessedReferenceRunName == 'None'): + timeseries_analysis_plot(config, [ohcTotal, ohc700m, ohc2000m, + ohcBottom], + movingAveragePoints, title, + xLabel, yLabel, figureName, lineStyles=['r-', 'r-', 'r--', 'r-.'], - lineWidths=[2, 1, 1.5, 1.5]) + lineWidths=[2, 1, 1.5, 1.5], + calendar=calendar) diff --git a/mpas_analysis/ocean/sst_timeseries.py b/mpas_analysis/ocean/sst_timeseries.py index c59115030..58ea5a0b3 100644 --- a/mpas_analysis/ocean/sst_timeseries.py +++ b/mpas_analysis/ocean/sst_timeseries.py @@ -1,15 +1,18 @@ -import xarray as xr -import pandas as pd -import datetime - -from ..shared.mpas_xarray.mpas_xarray import preprocess_mpas, \ - remove_repeated_time_index +import os from ..shared.plot.plotting import timeseries_analysis_plot -from ..shared.io import StreamsFile +from ..shared.generalized_reader.generalized_reader \ + import open_multifile_dataset + +from ..shared.timekeeping.utility import get_simulation_start_time, \ + date_to_days, days_to_datetime + +from ..shared.analysis_task import setup_task + +from ..shared.time_series import time_series -from ..shared.timekeeping.Date import Date +from ..shared.io.utility import build_config_full_path, make_directories def sst_timeseries(config, streamMap=None, variableMap=None): @@ -27,104 +30,129 @@ def sst_timeseries(config, streamMap=None, variableMap=None): to their mpas_analysis counterparts. Author: Xylar Asay-Davis, Milena Veneziani - Last Modified: 12/05/2016 + Last Modified: 04/08/2017 """ - # Define/read in general variables + def compute_sst_part(timeIndices, firstCall): + dsLocal = ds.isel(Time=timeIndices) + return dsLocal + print ' Load SST data...' - # read parameters from config file - indir = config.get('paths', 'archive_dir_ocn') - streams_filename = config.get('input', 'ocean_streams_filename') - streams = StreamsFile(streams_filename, streamsdir=indir) + # perform common setup for the task + namelist, runStreams, historyStreams, calendar, namelistMap, streamMap, \ + variableMap, plotsDirectory = setup_task(config, componentName='ocean') + + simulationStartTime = get_simulation_start_time(runStreams) # get a list of timeSeriesStats output files from the streams file, # reading only those that are between the start and end dates - startDate = config.get('time', 'timeseries_start_date') - endDate = config.get('time', 'timeseries_end_date') - streamName = streams.find_stream(streamMap['timeSeriesStats']) - infiles = streams.readpath(streamName, startDate=startDate, - endDate=endDate) - print 'Reading files {} through {}'.format(infiles[0], infiles[-1]) - - casename = config.get('case', 'casename') - ref_casename_v0 = config.get('case', 'ref_casename_v0') - indir_v0data = config.get('paths', 'ref_archive_v0_ocndir') + startDate = config.get('timeSeries', 'startDate') + endDate = config.get('timeSeries', 'endDate') + streamName = historyStreams.find_stream(streamMap['timeSeriesStats']) + fileNames = historyStreams.readpath(streamName, startDate=startDate, + endDate=endDate, calendar=calendar) + print '\n Reading files:\n' \ + ' {} through\n {}'.format( + os.path.basename(fileNames[0]), + os.path.basename(fileNames[-1])) + + mainRunName = config.get('runs', 'mainRunName') + preprocessedReferenceRunName = config.get('runs', + 'preprocessedReferenceRunName') + preprocessedInputDirectory = config.get('oceanPreprocessedReference', + 'baseDirectory') + + movingAveragePoints = config.getint('timeSeriesSST', 'movingAveragePoints') - plots_dir = config.get('paths', 'plots_dir') + regions = config.getExpression('regions', 'regions') + plotTitles = config.getExpression('regions', 'plotTitles') + regionIndicesToPlot = config.getExpression('timeSeriesSST', + 'regionIndicesToPlot') - yr_offset = config.getint('time', 'yr_offset') + outputDirectory = build_config_full_path(config, 'output', + 'timeseriesSubdirectory') - N_movavg = config.getint('sst_timeseries', 'N_movavg') + make_directories(outputDirectory) - regions = config.getExpression('regions', 'regions') - plot_titles = config.getExpression('regions', 'plot_titles') - iregions = config.getExpression('sst_timeseries', 'regionIndicesToPlot') + regionNames = config.getExpression('regions', 'regions') + regionNames = [regionNames[index] for index in regionIndicesToPlot] # Load data: varList = ['avgSurfaceTemperature'] - ds = xr.open_mfdataset( - infiles, - preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset, - timestr='Time', - onlyvars=varList, - varmap=variableMap)) - ds = remove_repeated_time_index(ds) - - # convert the start and end dates to datetime objects using - # the Date class, which ensures the results are within the - # supported range - time_start = Date(startDate).to_datetime(yr_offset) - time_end = Date(endDate).to_datetime(yr_offset) - # select only the data in the specified range of years - ds = ds.sel(Time=slice(time_start, time_end)) - - SSTregions = ds.avgSurfaceTemperature - - year_start = (pd.to_datetime(ds.Time.min().values)).year - year_end = (pd.to_datetime(ds.Time.max().values)).year - time_start = datetime.datetime(year_start, 1, 1) - time_end = datetime.datetime(year_end, 12, 31) - - if ref_casename_v0 != 'None': - print ' Load in SST for ACMEv0 case...' - infiles_v0data = '{}/SST.{}.year*.nc'.format(indir_v0data, - ref_casename_v0) - ds_v0 = xr.open_mfdataset( - infiles_v0data, - preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset)) - ds_v0 = remove_repeated_time_index(ds_v0) - year_end_v0 = (pd.to_datetime(ds_v0.Time.max().values)).year - if year_start <= year_end_v0: - ds_v0_tslice = ds_v0.sel(Time=slice(time_start, time_end)) + ds = open_multifile_dataset(fileNames=fileNames, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=varList, + variableMap=variableMap, + startDate=startDate, + endDate=endDate) + + ds = ds.isel(nOceanRegions=regionIndicesToPlot) + + yearStart = days_to_datetime(ds.Time.min(), calendar=calendar).year + yearEnd = days_to_datetime(ds.Time.max(), calendar=calendar).year + timeStart = date_to_days(year=yearStart, month=1, day=1, + calendar=calendar) + timeEnd = date_to_days(year=yearEnd, month=12, day=31, + calendar=calendar) + + if preprocessedReferenceRunName != 'None': + print ' Load in SST for a preprocesses reference run...' + inFilesPreprocessed = '{}/SST.{}.year*.nc'.format( + preprocessedInputDirectory, preprocessedReferenceRunName) + dsPreprocessed = open_multifile_dataset( + fileNames=inFilesPreprocessed, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='xtime') + yearEndPreprocessed = days_to_datetime(dsPreprocessed.Time.max(), + calendar=calendar).year + if yearStart <= yearEndPreprocessed: + dsPreprocessedTimeSlice = \ + dsPreprocessed.sel(Time=slice(timeStart, timeEnd)) else: - print ' Warning: v0 time series lies outside current bounds of v1 time series. Skipping it.' - ref_casename_v0 = 'None' + print ' Warning: Preprocessed time series ends before the ' \ + 'timeSeries startYear and will not be plotted.' + preprocessedReferenceRunName = 'None' + + cacheFileName = '{}/sstTimeSeries.nc'.format(outputDirectory) + + dsSST = time_series.cache_time_series(ds.Time.values, compute_sst_part, + cacheFileName, calendar, + yearsPerCacheUpdate=10, + printProgress=True) print ' Make plots...' - for index in range(len(iregions)): - iregion = iregions[index] + for index, regionIndex in enumerate(regionIndicesToPlot): + + title = plotTitles[regionIndex] + title = 'SST, %s, %s (r-)' % (title, mainRunName) + xLabel = 'Time [years]' + yLabel = '[$^\circ$ C]' + + SST = dsSST.avgSurfaceTemperature.isel(nOceanRegions=index) - title = plot_titles[iregion] - title = 'SST, %s, %s (r-)' % (title, casename) - xlabel = 'Time [years]' - ylabel = '[$^\circ$ C]' - SST = SSTregions[:, iregion] + figureName = '{}/sst_{}_{}.png'.format(plotsDirectory, + regions[regionIndex], + mainRunName) - if ref_casename_v0 != 'None': - figname = '{}/sst_{}_{}_{}.png'.format(plots_dir, regions[iregion], - casename, ref_casename_v0) - SST_v0 = ds_v0_tslice.SST + if preprocessedReferenceRunName != 'None': + SST_v0 = dsPreprocessedTimeSlice.SST - title = '{}\n {} (b-)'.format(title, ref_casename_v0) - timeseries_analysis_plot(config, [SST, SST_v0], N_movavg, - title, xlabel, ylabel, figname, + title = '{}\n {} (b-)'.format(title, preprocessedReferenceRunName) + timeseries_analysis_plot(config, [SST, SST_v0], + movingAveragePoints, + title, xLabel, yLabel, figureName, lineStyles=['r-', 'b-'], - lineWidths=[1.2, 1.2]) + lineWidths=[1.2, 1.2], + calendar=calendar) else: - figname = '{}/sst_{}_{}.png'.format(plots_dir, regions[iregion], - casename) - timeseries_analysis_plot(config, [SST], N_movavg, title, xlabel, - ylabel, figname, lineStyles=['r-'], - lineWidths=[1.2]) + timeseries_analysis_plot(config, [SST], movingAveragePoints, title, + xLabel, yLabel, figureName, + lineStyles=['r-'], lineWidths=[1.2], + calendar=calendar) diff --git a/mpas_analysis/ocean/variable_stream_map.py b/mpas_analysis/ocean/variable_stream_map.py index 1bccabb7a..885ce8d63 100644 --- a/mpas_analysis/ocean/variable_stream_map.py +++ b/mpas_analysis/ocean/variable_stream_map.py @@ -1,5 +1,23 @@ -# mappings of stream names from various MPAS-O versions to those in -# mpas_analysis +''' +Mappings of namelist options, stream names and variable names from various +MPAS-O versions to those used by mpas_analysis + +Authors +------- +Xylar Asay-Davis + +Last Modified +------------- +03/29/2017 +''' + +oceanNamelistMap = { + 'config_am_timeseriesstatsmonthly_enable': + ['config_am_timeseriesstatsmonthly_enable', + 'config_am_timeseriesstats_enable'], + 'config_am_mocstreamfunction_enable': + ['config_am_mocstreamfunction_enable']} + oceanStreamMap = {'timeSeriesStats': ['timeSeriesStatsOutput', 'timeSeriesStatsMonthly', 'timeSeriesStatsMonthlyOutput']} @@ -34,6 +52,16 @@ 'time_avg_avgValueWithinOceanLayerRegion_avgLayerThickness_1', 'timeMonthly_avg_avgValueWithinOceanLayerRegion_avgLayerThickness'] +# MOC +oceanVariableMap['avgNormalVelocity'] = \ + ['time_avg_normalVelocity', + 'time_avg_normalVelocity_1', + 'timeMonthly_avg_normalVelocity'] +oceanVariableMap['avgVertVelocityTop'] = \ + ['time_avg_vertVelocityTop', + 'time_avg_vertVelocityTop_1', + 'timeMonthly_avg_vertVelocityTop'] + # model vs. obs. oceanVariableMap['mld'] = \ ['time_avg_dThreshMLD', diff --git a/mpas_analysis/sea_ice/modelvsobs.py b/mpas_analysis/sea_ice/modelvsobs.py index f08740f74..15b2af251 100644 --- a/mpas_analysis/sea_ice/modelvsobs.py +++ b/mpas_analysis/sea_ice/modelvsobs.py @@ -1,21 +1,39 @@ +""" +General comparison of 2-d model fields against data. Currently only supports +sea ice concentration (sic) and sea ice thickness (sit) + +Authors +------- +Xylar Asay-Davis, Milena Veneziani + +Last Modified +------------- +04/08/2017 +""" + import os import os.path -import subprocess -import matplotlib.pyplot as plt -import matplotlib.colors as cols -import numpy as np import numpy.ma as ma -import xarray as xr -import datetime +import numpy as np + +import netCDF4 + +from ..shared.constants import constants + +from ..shared.interpolation import interpolate -from netCDF4 import Dataset as netcdf_dataset +from ..shared.climatology import climatology -from ..shared.mpas_xarray.mpas_xarray import preprocess_mpas, \ - remove_repeated_time_index -from ..shared.plot.plotting import plot_polar_comparison +from ..shared.plot.plotting import plot_polar_comparison, \ + setup_colormap -from ..shared.io import StreamsFile +from ..shared.io.utility import build_config_full_path + +from ..shared.generalized_reader.generalized_reader \ + import open_multifile_dataset + +from .utility import setup_sea_ice_task def seaice_modelvsobs(config, streamMap=None, variableMap=None): @@ -23,356 +41,454 @@ def seaice_modelvsobs(config, streamMap=None, variableMap=None): Performs analysis of sea-ice properties by comparing with previous model results and/or observations. - config is an instance of MpasAnalysisConfigParser containing configuration - options. + config : instance of MpasAnalysisConfigParser + Contains configuration options - If present, streamMap is a dictionary of MPAS-O stream names that map to - their mpas_analysis counterparts. + field : {'sst', 'sss', 'mld'} + The name of a field to be analyized - If present, variableMap is a dictionary of MPAS-O variable names that map - to their mpas_analysis counterparts. + streamMap : dict, optional + A dictionary of MPAS-O stream names that map to their mpas_analysis + counterparts. - Author: Xylar Asay-Davis, Milena Veneziani - Last Modified: 12/07/2016 - """ + variableMap : dict, optional + A dictionary of MPAS-O variable names that map to their mpas_analysis + counterparts. - # read parameters from config file - indir = config.get('paths', 'archive_dir_ocn') + Authors + ------- + Xylar Asay-Davis, Milena Veneziani - streams_filename = config.get('input', 'seaice_streams_filename') - streams = StreamsFile(streams_filename, streamsdir=indir) + Last Modified + ------------- + 04/08/2017 + """ + + # perform common setup for the task + namelist, runStreams, historyStreams, calendar, namelistMap, \ + streamMap, variableMap, plotsDirectory, simulationStartTime, \ + restartFileName = setup_sea_ice_task(config) # get a list of timeSeriesStatsMonthly output files from the streams file, # reading only those that are between the start and end dates - startDate = config.get('time', 'climo_start_date') - endDate = config.get('time', 'climo_end_date') - streamName = streams.find_stream(streamMap['timeSeriesStats']) - infiles = streams.readpath(streamName, startDate=startDate, - endDate=endDate) - print 'Reading files {} through {}'.format(infiles[0], infiles[-1]) - - plots_dir = config.get('paths', 'plots_dir') - obsdir = config.get('paths', 'obs_seaicedir') - - casename = config.get('case', 'casename') - - remapfile = config.get('data', 'mpas_remapfile') - climodir = config.get('data', 'mpas_climodir') - - climo_yr1 = config.getint('time', 'climo_yr1') - climo_yr2 = config.getint('time', 'climo_yr2') - yr_offset = config.getint('time', 'yr_offset') - - # climodir = "{}/{}".format(climodir, casename) - climodir_regridded = "{}/mpas_regridded".format(climodir) - if not os.path.isdir(climodir): - print "\nClimatology directory does not exist. Create it...\n" - os.mkdir(climodir) - if not os.path.isdir(climodir_regridded): - print "\nRegridded directory does not exist. Create it...\n" - os.mkdir(climodir_regridded) - - print indir - print climodir - - # Model climo (output) filenames - climofiles = {} - climofiles['winNH'] = "mpas-cice_climo.years{:04d}-{:04d}.jfm.nc".format( - climo_yr1, climo_yr2) - climofiles['sumNH'] = "mpas-cice_climo.years{:04d}-{:04d}.jas.nc".format( - climo_yr1, climo_yr2) - climofiles['winSH'] = "mpas-cice_climo.years{:04d}-{:04d}.djf.nc".format( - climo_yr1, climo_yr2) - climofiles['sumSH'] = "mpas-cice_climo.years{:04d}-{:04d}.jja.nc".format( - climo_yr1, climo_yr2) - climofiles['on'] = "mpas-cice_climo.years{:04d}-{:04d}.on.nc".format( - climo_yr1, climo_yr2) - climofiles['fm'] = "mpas-cice_climo.years{:04d}-{:04d}.fm.nc".format( - climo_yr1, climo_yr2) - - # make a dictionary of the months in each climotology - monthsInClim = {} - monthsInClim['winNH'] = [1, 2, 3] - monthsInClim['sumNH'] = [7, 8, 9] - monthsInClim['winSH'] = [12, 1, 2] - monthsInClim['sumSH'] = [6, 7, 8] - monthsInClim['on'] = [10, 11] - monthsInClim['fm'] = [2, 3] - - # Obs filenames - obs_iceconc_filenames = {} - obs_iceconc_filenames['winNH_NASATeam'] = \ - "{}/SSMI/NASATeam_NSIDC0051/SSMI_NASATeam_gridded_concentration_NH_" \ - "jfm.interp0.5x0.5.nc".format(obsdir) - obs_iceconc_filenames['sumNH_NASATeam'] = \ - "{}/SSMI/NASATeam_NSIDC0051/SSMI_NASATeam_gridded_concentration_NH_" \ - "jas.interp0.5x0.5.nc".format(obsdir) - obs_iceconc_filenames['winSH_NASATeam'] = \ - "{}/SSMI/NASATeam_NSIDC0051/SSMI_NASATeam_gridded_concentration_SH_" \ - "djf.interp0.5x0.5.nc".format(obsdir) - obs_iceconc_filenames['sumSH_NASATeam'] = \ - "{}/SSMI/NASATeam_NSIDC0051/SSMI_NASATeam_gridded_concentration_SH_" \ - "jja.interp0.5x0.5.nc".format(obsdir) - obs_iceconc_filenames['winNH_Bootstrap'] = \ - "{}/SSMI/Bootstrap_NSIDC0079/SSMI_Bootstrap_gridded_concentration_" \ - "NH_jfm.interp0.5x0.5.nc".format(obsdir) - obs_iceconc_filenames['sumNH_Bootstrap'] = \ - "{}/SSMI/Bootstrap_NSIDC0079/SSMI_Bootstrap_gridded_concentration_" \ - "NH_jas.interp0.5x0.5.nc".format(obsdir) - obs_iceconc_filenames['winSH_Bootstrap'] = \ - "{}/SSMI/Bootstrap_NSIDC0079/SSMI_Bootstrap_gridded_concentration_" \ - "SH_djf.interp0.5x0.5.nc".format(obsdir) - obs_iceconc_filenames['sumSH_Bootstrap'] = \ - "{}/SSMI/Bootstrap_NSIDC0079/SSMI_Bootstrap_gridded_concentration_" \ - "SH_jja.interp0.5x0.5.nc".format(obsdir) - obs_icethick_filenames = {} - obs_icethick_filenames['onNH'] = "{}/ICESat/ICESat_gridded_mean_" \ - "thickness_NH_on.interp0.5x0.5.nc".format(obsdir) - obs_icethick_filenames['fmNH'] = "{}/ICESat/ICESat_gridded_mean_" \ - "thickness_NH_fm.interp0.5x0.5.nc".format(obsdir) - obs_icethick_filenames['onSH'] = "{}/ICESat/ICESat_gridded_mean_" \ - "thickness_SH_on.interp0.5x0.5.nc".format(obsdir) - obs_icethick_filenames['fmSH'] = "{}/ICESat/ICESat_gridded_mean_" \ - "thickness_SH_fm.interp0.5x0.5.nc".format(obsdir) - - # Checks on directory/files existence: - for climName in obs_iceconc_filenames: - obs_filename = obs_iceconc_filenames[climName] - if not os.path.isfile(obs_filename): - raise SystemExit("Obs file {} not found. Exiting...".format( - obs_filename)) - for climName in obs_icethick_filenames: - obs_filename = obs_icethick_filenames[climName] - if not os.path.isfile(obs_filename): - raise SystemExit("Obs file {} not found. Exiting...".format( - obs_filename)) - + startDate = config.get('climatology', 'startDate') + endDate = config.get('climatology', 'endDate') + streamName = historyStreams.find_stream(streamMap['timeSeriesStats']) + fileNames = historyStreams.readpath(streamName, startDate=startDate, + endDate=endDate, calendar=calendar) + print '\n Reading files:\n' \ + ' {} through\n {}'.format( + os.path.basename(fileNames[0]), + os.path.basename(fileNames[-1])) # Load data print " Load sea-ice data..." - ds = xr.open_mfdataset( - infiles, - preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset, - timestr='Time', - onlyvars=['iceAreaCell', - 'iceVolumeCell'], - varmap=variableMap)) - ds = remove_repeated_time_index(ds) + ds = open_multifile_dataset(fileNames=fileNames, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=['iceAreaCell', + 'iceVolumeCell'], + variableMap=variableMap, + startDate=startDate, + endDate=endDate) # Compute climatologies (first motnhly and then seasonally) print " Compute seasonal climatologies..." - time_start = datetime.datetime(yr_offset+climo_yr1, 1, 1) - time_end = datetime.datetime(yr_offset+climo_yr2, 12, 31) - ds_tslice = ds.sel(Time=slice(time_start, time_end)) - # check that each year has 24 months (?) - monthly_clim = ds_tslice.groupby('Time.month').mean('Time') - daysInMonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] - monthLetters = ['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D'] - - clims = {} - for climName in monthsInClim: - months = monthsInClim[climName] - month = months[0] - days = daysInMonth[month-1] - climatology = days*monthly_clim.sel(month=month) - totalDays = days - for month in months[1:]: - days = daysInMonth[month-1] - climatology += days*monthly_clim.sel(month=month) - totalDays += days - climatology /= totalDays - - clims[climName] = climatology - - print " Regrid fields to regular grid..." - for climName in clims: - # Save to netcdf files - outFileName = "{}/{}".format(climodir, climofiles[climName]) - clims[climName].to_netcdf(outFileName) - args = ["ncremap", "-P", "mpas", "-i", outFileName, "-m", remapfile, - "-O", climodir_regridded] - try: - subprocess.check_call(args) - except subprocess.CalledProcessError, e: - print 'Error with call ', ' '.join(args) - print e - raise e + + changed, startYear, endYear = \ + climatology.update_start_end_year(ds, config, calendar) + + mpasMappingFileName = climatology.write_mpas_mapping_file( + config=config, meshFileName=restartFileName) + + _compute_and_plot_concentration(config, ds, mpasMappingFileName, calendar) + + _compute_and_plot_thickness(config, ds, mpasMappingFileName, calendar) + + +def _compute_and_plot_concentration(config, ds, mpasMappingFileName, calendar): + """ + Given a config file, monthly climatology on the mpas grid, and the data + necessary to perform horizontal interpolation to a comparison grid, + computes seasonal climatologies and plots model results, observations + and biases in sea-ice concentration. + + Parameters + ---------- + config : an instance of MpasConfigParser + + ds : ``xarray.Dataset`` object + an xarray data set from which to compute climatologies + + mpasMappingFileName : The name of a mapping file used to perform + interpolation of MPAS model results + + calendar: ``{'gregorian', 'gregorian_noleap'}`` + The name of one of the calendars supported by MPAS cores + + Authors + ------- + Xylar Asay-Davis, Milena Veneziani + + Last Modified + ------------- + 04/08/2017 + """ print " Make ice concentration plots..." - suptitle = "Ice concentration" - # interate over observations of sea-ice concentration - first = True - for climName in ['winNH', 'winSH', 'sumNH', 'sumSH']: - hemisphere = climName[-2:] - season = climName[:-2] + plotsDirectory = build_config_full_path(config, 'output', + 'plotsSubdirectory') + mainRunName = config.get('runs', 'mainRunName') + startYear = config.getint('climatology', 'startYear') + endYear = config.getint('climatology', 'endYear') + overwriteMpasClimatology = config.getWithDefault( + 'climatology', 'overwriteMpasClimatology', False) + + overwriteObsClimatology = config.getWithDefault( + 'seaIceObservations', 'overwriteObsClimatology', False) + + subtitle = "Ice concentration" + + hemisphereSeasons = {'JFM': ('NH', 'Winter'), + 'JAS': ('NH', 'Summer'), + 'DJF': ('SH', 'Winter'), + 'JJA': ('SH', 'Summer')} + + obsFileNames = {} + regriddedObsFileNames = {} + + buildObsClimatologies = overwriteObsClimatology + for months in hemisphereSeasons: + hemisphere, season = hemisphereSeasons[months] + climFieldName = 'iceConcentration' + for obsName in ['NASATeam', 'Bootstrap']: + key = (months, obsName) + obsFileName = build_config_full_path( + config, 'seaIceObservations', + 'concentration{}{}_{}'.format(obsName, hemisphere, months)) + obsFieldName = '{}_{}_{}'.format(climFieldName, hemisphere, + obsName) + + if not os.path.isfile(obsFileName): + raise OSError("Obs file {} not found.".format( + obsFileName)) + + (climatologyFileName, regriddedFileName) = \ + climatology.get_observation_climatology_file_names( + config=config, fieldName=obsFieldName, monthNames=months, + componentName='seaIce', gridFileName=obsFileName, + latVarName='t_lat', lonVarName='t_lon') + + obsFileNames[key] = obsFileName + regriddedObsFileNames[key] = regriddedFileName + + if not os.path.exists(regriddedFileName): + buildObsClimatologies = True + + for months in hemisphereSeasons: + hemisphere, season = hemisphereSeasons[months] + monthValues = constants.monthDictionary[months] + field = 'iceAreaCell' + climFieldName = 'iceConcentration' + + # interpolate the model results + (climatologyFileName, climatologyPrefix, regriddedFileName) = \ + climatology.get_mpas_climatology_file_names( + config=config, fieldName=climFieldName, + monthNames=months) + + if overwriteMpasClimatology or not os.path.exists(climatologyFileName): + seasonalClimatology = climatology.cache_climatologies( + ds, monthValues, config, climatologyPrefix, calendar, + printProgress=True) + # write out the climatology so we can interpolate it with + # interpolate.remap + seasonalClimatology.to_netcdf(climatologyFileName) + + interpolate.remap(inFileName=climatologyFileName, + outFileName=regriddedFileName, + inWeightFileName=mpasMappingFileName, + sourceFileType='mpas', + overwrite=overwriteMpasClimatology) + + ncFile = netCDF4.Dataset(regriddedFileName, mode='r') + iceConcentration = ncFile.variables[field][:] + lons = ncFile.variables["lon"][:] + lats = ncFile.variables["lat"][:] + ncFile.close() + lonTarg, latTarg = np.meshgrid(lons, lats) if hemisphere == 'NH': plotProjection = 'npstere' else: plotProjection = 'spstere' - clevsModelObs = config.getExpression('seaice_modelvsobs', - 'clevsModelObs_conc_{}'.format( - season)) - cmap = plt.get_cmap(config.get('seaice_modelvsobs', 'cmapModelObs')) - cmapIndices = config.getExpression('seaice_modelvsobs', - 'cmapIndicesModelObs') - cmapModelObs = cols.ListedColormap(cmap(cmapIndices), "cmapModelObs") - - clevsDiff = config.getExpression('seaice_modelvsobs', - 'clevsDiff_conc_{}'.format(season)) - cmap = plt.get_cmap(config.get('seaice_modelvsobs', 'cmapDiff')) - cmapIndices = config.getExpression('seaice_modelvsobs', - 'cmapIndicesDiff') - cmapDiff = cols.ListedColormap(cmap(cmapIndices), "cmapDiff") - - lon0 = config.getfloat('seaice_modelvsobs', - 'lon0_{}'.format(hemisphere)) - latmin = config.getfloat('seaice_modelvsobs', - 'latmin_{}'.format(hemisphere)) - - # Load in sea-ice data - # Model... - # ice concentrations - fileName = "{}/{}".format(climodir_regridded, climofiles[climName]) - f = netcdf_dataset(fileName, mode='r') - iceconc = f.variables["iceAreaCell"][:] - if(first): - lons = f.variables["lon"][:] - lats = f.variables["lat"][:] - print "Min lon: ", np.amin(lons), "Max lon: ", np.amax(lons) - print "Min lat: ", np.amin(lats), "Max lat: ", np.amax(lats) - Lons, Lats = np.meshgrid(lons, lats) - first = False - f.close() - - # ...and observations + (colormapResult, colorbarLevelsResult) = setup_colormap( + config, + 'regriddedSeaIceConcThick', + suffix='ConcResult{}'.format(season)) + (colormapDifference, colorbarLevelsDifference) = setup_colormap( + config, + 'regriddedSeaIceConcThick', + suffix='ConcDifference{}'.format(season)) + + referenceLongitude = config.getfloat( + 'regriddedSeaIceConcThick', + 'referenceLongitude{}'.format(hemisphere)) + minimumLatitude = config.getfloat( + 'regriddedSeaIceConcThick', + 'minimumLatitude{}'.format(hemisphere)) + # ice concentrations from NASATeam (or Bootstrap) algorithm for obsName in ['NASATeam', 'Bootstrap']: - fileName = obs_iceconc_filenames['{}_{}'.format(climName, obsName)] - f = netcdf_dataset(fileName, mode='r') - obs_iceconc = f.variables["AICE"][:] - f.close() - - diff = iceconc - obs_iceconc - - monthsName = [] - for month in monthsInClim[climName]: - monthsName.append(monthLetters[month-1]) - monthsName = ''.join(monthsName) + key = (months, obsName) + regriddedFileName = regriddedObsFileNames[key] + + if buildObsClimatologies: + obsFileName = obsFileNames[key] + obsMappingFileName = \ + climatology.write_observations_mapping_file( + config=config, componentName='seaIce', + fieldName='seaIce', gridFileName=obsFileName, + latVarName='t_lat', lonVarName='t_lon') + + if obsMappingFileName is None: + regriddedFileName = obsFileName + else: + interpolate.remap(inFileName=obsFileName, + outFileName=regriddedFileName, + inWeightFileName=obsMappingFileName, + sourceFileType='latlon', + sourceLatVarName='t_lat', + sourceLonVarName='t_lon', + overwrite=overwriteObsClimatology) + + # read in the results from the remapped files + ncFile = netCDF4.Dataset(regriddedFileName, mode='r') + obsIceConcentration = ncFile.variables["AICE"][:] + ncFile.close() + + difference = iceConcentration - obsIceConcentration title = "{} ({}, years {:04d}-{:04d})".format( - suptitle, monthsName, climo_yr1, climo_yr2) + subtitle, months, startYear, endYear) fileout = "{}/iceconc{}{}_{}_{}_years{:04d}-{:04d}.png".format( - plots_dir, obsName, hemisphere, casename, monthsName, - climo_yr1, climo_yr2) + plotsDirectory, obsName, hemisphere, mainRunName, + months, startYear, endYear) plot_polar_comparison( config, - Lons, - Lats, - iceconc, - obs_iceconc, - diff, - cmapModelObs, - clevsModelObs, - cmapDiff, - clevsDiff, + lonTarg, + latTarg, + iceConcentration, + obsIceConcentration, + difference, + colormapResult, + colorbarLevelsResult, + colormapDifference, + colorbarLevelsDifference, title=title, fileout=fileout, plotProjection=plotProjection, - latmin=latmin, - lon0=lon0, - modelTitle=casename, + latmin=minimumLatitude, + lon0=referenceLongitude, + modelTitle=mainRunName, obsTitle="Observations (SSM/I {})".format(obsName), diffTitle="Model-Observations", cbarlabel="fraction") + +def _compute_and_plot_thickness(config, ds, mpasMappingFileName, calendar): + """ + Given a config file, monthly climatology on the mpas grid, and the data + necessary to perform horizontal interpolation to a comparison grid, + computes seasonal climatologies and plots model results, observations + and biases in sea-ice thickness. + + Parameters + ---------- + config : an instance of MpasConfigParser + + ds : ``xarray.Dataset`` object + an xarray data set from which to compute climatologies + + mpasMappingFileName : The name of a mapping file used to perform + interpolation of MPAS model results + + calendar: ``{'gregorian', 'gregorian_noleap'}`` + The name of one of the calendars supported by MPAS cores + + Authors + ------- + Xylar Asay-Davis, Milena Veneziani + + Last Modified + ------------- + 04/08/2017 + """ + print " Make ice thickness plots..." - # Plot Northern Hemisphere FM sea-ice thickness - suptitle = "Ice thickness" - # interate over observations of sea-ice thickness - for climName in ['fm', 'on']: - - # Load in sea-ice data - # Model... - # ice concentrations - fileName = "{}/{}".format(climodir_regridded, climofiles[climName]) - f = netcdf_dataset(fileName, mode='r') - icethick = f.variables["iceVolumeCell"][:] - f.close() - - monthsName = [] - for month in monthsInClim[climName]: - monthsName.append(monthLetters[month-1]) - monthsName = ''.join(monthsName) + + subtitle = "Ice thickness" + + plotsDirectory = build_config_full_path(config, 'output', + 'plotsSubdirectory') + mainRunName = config.get('runs', 'mainRunName') + startYear = config.getint('climatology', 'startYear') + endYear = config.getint('climatology', 'endYear') + overwriteMpasClimatology = config.getWithDefault( + 'climatology', 'overwriteMpasClimatology', False) + + overwriteObsClimatology = config.getWithDefault( + 'seaIceObservations', 'overwriteObsClimatology', False) + + obsFileNames = {} + regriddedObsFileNames = {} + + # build a list of regridded observations files + buildObsClimatologies = overwriteObsClimatology + for months in ['FM', 'ON']: + climFieldName = 'iceThickness' + for hemisphere in ['NH', 'SH']: + key = (months, hemisphere) + obsFileName = build_config_full_path( + config, 'seaIceObservations', + 'thickness{}_{}'.format(hemisphere, months)) + if not os.path.isfile(obsFileName): + raise OSError("Obs file {} not found.".format( + obsFileName)) + + obsFieldName = '{}_{}'.format(climFieldName, hemisphere) + (climatologyFileName, regriddedFileName) = \ + climatology.get_observation_climatology_file_names( + config=config, fieldName=obsFieldName, monthNames=months, + componentName='seaIce', gridFileName=obsFileName, + latVarName='t_lat', lonVarName='t_lon') + + obsFileNames[key] = obsFileName + regriddedObsFileNames[key] = regriddedFileName + + if not os.path.exists(regriddedFileName): + buildObsClimatologies = True + + for months in ['FM', 'ON']: + monthValues = constants.monthDictionary[months] + field = 'iceVolumeCell' + climFieldName = 'iceThickness' + + # interpolate the model results + (climatologyFileName, climatologyPrefix, regriddedFileName) = \ + climatology.get_mpas_climatology_file_names( + config=config, fieldName=climFieldName, + monthNames=months) + + if overwriteMpasClimatology or not os.path.exists(climatologyFileName): + seasonalClimatology = climatology.cache_climatologies( + ds, monthValues, config, climatologyPrefix, calendar, + printProgress=True) + # write out the climatology so we can interpolate it with + # interpolate.remap. Set _FillValue so ncremap doesn't produce + # an error + seasonalClimatology.to_netcdf(climatologyFileName) + + interpolate.remap(inFileName=climatologyFileName, + outFileName=regriddedFileName, + inWeightFileName=mpasMappingFileName, + sourceFileType='mpas', + overwrite=overwriteMpasClimatology) + + ncFile = netCDF4.Dataset(regriddedFileName, mode='r') + iceThickness = ncFile.variables[field][:] + lons = ncFile.variables["lon"][:] + lats = ncFile.variables["lat"][:] + ncFile.close() + lonTarg, latTarg = np.meshgrid(lons, lats) for hemisphere in ['NH', 'SH']: - # ...and observations - # ice concentrations from NASATeam (or Bootstrap) algorithm - - clevsModelObs = config.getExpression( - 'seaice_modelvsobs', - 'clevsModelObs_thick_{}'.format(hemisphere)) - cmap = plt.get_cmap(config.get('seaice_modelvsobs', - 'cmapModelObs')) - cmapIndices = config.getExpression('seaice_modelvsobs', - 'cmapIndicesModelObs') - cmapModelObs = cols.ListedColormap(cmap(cmapIndices), - "cmapModelObs") - - clevsDiff = config.getExpression( - 'seaice_modelvsobs', - 'clevsDiff_thick_{}'.format(hemisphere)) - cmap = plt.get_cmap(config.get('seaice_modelvsobs', 'cmapDiff')) - cmapIndices = config.getExpression('seaice_modelvsobs', - 'cmapIndicesDiff') - cmapDiff = cols.ListedColormap(cmap(cmapIndices), "cmapDiff") - - lon0 = config.getfloat('seaice_modelvsobs', - 'lon0_{}'.format(hemisphere)) - latmin = config.getfloat('seaice_modelvsobs', - 'latmin_{}'.format(hemisphere)) - - fileName = obs_icethick_filenames['{}{}'.format(climName, - hemisphere)] - f = netcdf_dataset(fileName, mode='r') - obs_icethick = f.variables["HI"][:] - f.close() + + (colormapResult, colorbarLevelsResult) = setup_colormap( + config, + 'regriddedSeaIceConcThick', + suffix='ThickResult{}'.format(hemisphere)) + (colormapDifference, colorbarLevelsDifference) = setup_colormap( + config, + 'regriddedSeaIceConcThick', + suffix='ThickDifference{}'.format(hemisphere)) + + referenceLongitude = config.getfloat( + 'regriddedSeaIceConcThick', + 'referenceLongitude{}'.format(hemisphere)) + minimumLatitude = config.getfloat( + 'regriddedSeaIceConcThick', + 'minimumLatitude{}'.format(hemisphere)) + + # now the observations + key = (months, hemisphere) + regriddedFileName = regriddedObsFileNames[key] + + if buildObsClimatologies: + obsFileName = obsFileNames[key] + obsMappingFileName = \ + climatology.write_observations_mapping_file( + config=config, componentName='seaIce', + fieldName='seaIce', gridFileName=obsFileName, + latVarName='t_lat', lonVarName='t_lon') + + if obsMappingFileName is None: + regriddedFileName = obsFileName + else: + interpolate.remap(inFileName=obsFileName, + outFileName=regriddedFileName, + inWeightFileName=obsMappingFileName, + sourceFileType='latlon', + sourceLatVarName='t_lat', + sourceLonVarName='t_lon', + overwrite=overwriteObsClimatology) + + # read in the results from the remapped files + ncFile = netCDF4.Dataset(regriddedFileName, mode='r') + obsIceThickness = ncFile.variables["HI"][:] + ncFile.close() + # Mask thickness fields - icethick[icethick == 0] = ma.masked - obs_icethick = ma.masked_values(obs_icethick, 0) + iceThickness = ma.masked_values(iceThickness, 0) + obsIceThickness = ma.masked_values(obsIceThickness, 0) if hemisphere == 'NH': # Obs thickness should be nan above 86 (ICESat data) - obs_icethick[Lats > 86] = ma.masked + obsIceThickness[latTarg > 86] = ma.masked plotProjection = 'npstere' else: plotProjection = 'spstere' - diff = icethick - obs_icethick + difference = iceThickness - obsIceThickness - title = "{} ({}, years {:04d}-{:04d})".format(suptitle, monthsName, - climo_yr1, climo_yr2) + title = "{} ({}, years {:04d}-{:04d})".format(subtitle, months, + startYear, endYear) fileout = "{}/icethick{}_{}_{}_years{:04d}-{:04d}.png".format( - plots_dir, hemisphere, casename, monthsName, climo_yr1, - climo_yr2) + plotsDirectory, hemisphere, mainRunName, months, startYear, + endYear) plot_polar_comparison( config, - Lons, - Lats, - icethick, - obs_icethick, - diff, - cmapModelObs, - clevsModelObs, - cmapDiff, - clevsDiff, + lonTarg, + latTarg, + iceThickness, + obsIceThickness, + difference, + colormapResult, + colorbarLevelsResult, + colormapDifference, + colorbarLevelsDifference, title=title, fileout=fileout, plotProjection=plotProjection, - latmin=latmin, - lon0=lon0, - modelTitle=casename, + latmin=minimumLatitude, + lon0=referenceLongitude, + modelTitle=mainRunName, obsTitle="Observations (ICESat)", diffTitle="Model-Observations", cbarlabel="m") + + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/sea_ice/timeseries.py b/mpas_analysis/sea_ice/timeseries.py index d5116c327..a292cd2a9 100644 --- a/mpas_analysis/sea_ice/timeseries.py +++ b/mpas_analysis/sea_ice/timeseries.py @@ -1,16 +1,22 @@ -import numpy as np import xarray as xr -import pandas as pd -import datetime +import os -from ..shared.mpas_xarray.mpas_xarray import preprocess_mpas, \ - remove_repeated_time_index +from ..shared.plot.plotting import timeseries_analysis_plot, \ + timeseries_analysis_plot_polar -from ..shared.plot.plotting import timeseries_analysis_plot +from ..shared.io.utility import build_config_full_path, make_directories -from ..shared.io import StreamsFile +from ..shared.timekeeping.utility import date_to_days, days_to_datetime, \ + datetime_to_days +from ..shared.timekeeping.MpasRelativeDelta import MpasRelativeDelta -from ..shared.timekeeping.Date import Date +from ..shared.generalized_reader.generalized_reader \ + import open_multifile_dataset +from ..shared.mpas_xarray.mpas_xarray import subset_variables + +from .utility import setup_sea_ice_task + +from ..shared.time_series import time_series def seaice_timeseries(config, streamMap=None, variableMap=None): @@ -27,302 +33,383 @@ def seaice_timeseries(config, streamMap=None, variableMap=None): to their mpas_analysis counterparts. Author: Xylar Asay-Davis, Milena Veneziani - Last Modified: 12/07/2016 + Last Modified: 04/08/2017 """ + def compute_area_vol_part(timeIndices, firstCall): + dsLocal = ds.isel(Time=timeIndices) - # read parameters from config file - indir = config.get('paths', 'archive_dir_ocn') - - streams_filename = config.get('input', 'seaice_streams_filename') - streams = StreamsFile(streams_filename, streamsdir=indir) + if hemisphere == 'NH': + mask = dsMesh.latCell > 0 + else: + mask = dsMesh.latCell < 0 + dsLocal = dsLocal.where(mask) + + dsAreaSum = (dsLocal*dsMesh.areaCell).sum('nCells') + dsAreaSum = dsAreaSum.rename({'iceAreaCell': 'iceArea', + 'iceVolumeCell': 'iceVolume'}) + dsAreaSum['iceThickness'] = (dsAreaSum.iceVolume / + dsMesh.areaCell.sum('nCells')) + + dsAreaSum['iceArea'].attrs['units'] = 'm$^2$' + dsAreaSum['iceArea'].attrs['description'] = \ + 'Total {} sea ice area'.format(hemisphere) + dsAreaSum['iceVolume'].attrs['units'] = 'm$^3$' + dsAreaSum['iceVolume'].attrs['description'] = \ + 'Total {} sea ice volume'.format(hemisphere) + dsAreaSum['iceThickness'].attrs['units'] = 'm' + dsAreaSum['iceThickness'].attrs['description'] = \ + 'Mean {} sea ice volume'.format(hemisphere) + + return dsAreaSum + + # perform common setup for the task + namelist, runStreams, historyStreams, calendar, namelistMap, \ + streamMap, variableMap, plotsDirectory, simulationStartTime, \ + restartFileName = setup_sea_ice_task(config) # get a list of timeSeriesStatsMonthly output files from the streams file, # reading only those that are between the start and end dates - startDate = config.get('time', 'timeseries_start_date') - endDate = config.get('time', 'timeseries_end_date') - streamName = streams.find_stream(streamMap['timeSeriesStats']) - infiles = streams.readpath(streamName, startDate=startDate, - endDate=endDate) - print 'Reading files {} through {}'.format(infiles[0], infiles[-1]) - - varnames = ['iceAreaCell', 'iceVolumeCell'] - - plot_titles = {'iceAreaCell': 'Sea-ice area', - 'iceVolumeCell': 'Sea-ice volume', - 'iceThickness': 'Sea-ice thickness'} - - units_dict = {'iceAreaCell': '[km$^2$]', - 'iceVolumeCell': '[10$^3$ km$^3$]', - 'iceThickness': '[m]'} - - obs_filenames = { - 'iceAreaCell': [config.get('seaIceData', 'obs_iceareaNH'), - config.get('seaIceData', 'obs_iceareaSH')], - 'iceVolumeCell': [config.get('seaIceData', 'obs_icevolNH'), - config.get('seaIceData', 'obs_icevolSH')]} + startDate = config.get('timeSeries', 'startDate') + endDate = config.get('timeSeries', 'endDate') + streamName = historyStreams.find_stream(streamMap['timeSeriesStats']) + fileNames = historyStreams.readpath(streamName, startDate=startDate, + endDate=endDate, calendar=calendar) + print '\n Reading files:\n' \ + ' {} through\n {}'.format( + os.path.basename(fileNames[0]), + os.path.basename(fileNames[-1])) + + plotTitles = {'iceArea': 'Sea-ice area', + 'iceVolume': 'Sea-ice volume', + 'iceThickness': 'Sea-ice mean thickness'} + + units = {'iceArea': '[km$^2$]', + 'iceVolume': '[10$^3$ km$^3$]', + 'iceThickness': '[m]'} + + obsFileNames = { + 'iceArea': {'NH': build_config_full_path(config, 'seaIceObservations', + 'areaNH'), + 'SH': build_config_full_path(config, 'seaIceObservations', + 'areaSH')}, + 'iceVolume': {'NH': build_config_full_path(config, + 'seaIceObservations', + 'volNH'), + 'SH': build_config_full_path(config, + 'seaIceObservations', + 'volSH')}} # Some plotting rules - title_font_size = config.get('seaice_timeseries', 'title_font_size') + titleFontSize = config.get('timeSeriesSeaIceAreaVol', 'titleFontSize') - indir = config.get('paths', 'archive_dir_ocn') - meshfile = config.get('data', 'mpas_meshfile') + mainRunName = config.get('runs', 'mainRunName') + preprocessedReferenceRunName = config.get('runs', + 'preprocessedReferenceRunName') + preprocessedReferenceDirectory = config.get('seaIcePreprocessedReference', + 'baseDirectory') - casename = config.get('case', 'casename') - ref_casename_v0 = config.get('case', 'ref_casename_v0') - indir_v0data = config.get('paths', 'ref_archive_v0_seaicedir') + compareWithObservations = config.getboolean('timeSeriesSeaIceAreaVol', + 'compareWithObservations') - compare_with_obs = config.getboolean('seaice_timeseries', - 'compare_with_obs') + movingAveragePoints = config.getint('timeSeriesSeaIceAreaVol', + 'movingAveragePoints') - plots_dir = config.get('paths', 'plots_dir') + polarPlot = config.getboolean('timeSeriesSeaIceAreaVol', 'polarPlot') - yr_offset = config.getint('time', 'yr_offset') + outputDirectory = build_config_full_path(config, 'output', + 'timeseriesSubdirectory') - N_movavg = config.getint('seaice_timeseries', 'N_movavg') + make_directories(outputDirectory) print ' Load sea-ice data...' # Load mesh - dsmesh = xr.open_dataset(meshfile) + dsMesh = xr.open_dataset(restartFileName) + dsMesh = subset_variables(dsMesh, + variableList=['lonCell', 'latCell', 'areaCell']) # Load data - ds = xr.open_mfdataset( - infiles, - preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset, - timestr='Time', - onlyvars=['iceAreaCell', - 'iceVolumeCell'], - varmap=variableMap)) - ds = remove_repeated_time_index(ds) - - # convert the start and end dates to datetime objects using - # the Date class, which ensures the results are within the - # supported range - time_start = Date(startDate).to_datetime(yr_offset) - time_end = Date(endDate).to_datetime(yr_offset) - # select only the data in the specified range of years - ds = ds.sel(Time=slice(time_start, time_end)) - - # handle the case where the "mesh" file has a spurious time dimension - if 'Time' in dsmesh.keys(): - dsmesh = dsmesh.drop('Time') - ds = ds.merge(dsmesh) - - year_start = (pd.to_datetime(ds.Time.min().values)).year - year_end = (pd.to_datetime(ds.Time.max().values)).year - time_start = datetime.datetime(year_start, 1, 1) - time_end = datetime.datetime(year_end, 12, 31) - - if ref_casename_v0 != 'None': - infiles_v0data = '{}/icevol.{}.year*.nc'.format(indir_v0data, - ref_casename_v0) - ds_v0 = xr.open_mfdataset( - infiles_v0data, - preprocess=lambda x: preprocess_mpas(x, yearoffset=yr_offset)) - year_end_v0 = (pd.to_datetime(ds_v0.Time.max().values)).year - if year_start <= year_end_v0: - ds_v0_tslice = ds_v0.sel(Time=slice(time_start, time_end)) + ds = open_multifile_dataset(fileNames=fileNames, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=['iceAreaCell', + 'iceVolumeCell'], + variableMap=variableMap, + startDate=startDate, + endDate=endDate) + + yearStart = days_to_datetime(ds.Time.min(), calendar=calendar).year + yearEnd = days_to_datetime(ds.Time.max(), calendar=calendar).year + timeStart = date_to_days(year=yearStart, month=1, day=1, + calendar=calendar) + timeEnd = date_to_days(year=yearEnd, month=12, day=31, + calendar=calendar) + + if preprocessedReferenceRunName != 'None': + inFilesPreprocessed = '{}/icevol.{}.year*.nc'.format( + preprocessedReferenceDirectory, preprocessedReferenceRunName) + dsPreprocessed = open_multifile_dataset( + fileNames=inFilesPreprocessed, + calendar=calendar, + config=config, + timeVariableName='xtime') + preprocessedYearEnd = days_to_datetime(dsPreprocessed.Time.max(), + calendar=calendar).year + if yearStart <= preprocessedYearEnd: + dsPreprocessedTimeSlice = dsPreprocessed.sel(Time=slice(timeStart, + timeEnd)) else: - print ' Warning: v0 time series lies outside current bounds of v1 time series. Skipping it.' - ref_casename_v0 = 'None' - - # Make Northern and Southern Hemisphere partition: - areaCell = ds.areaCell - ind_nh = ds.latCell > 0 - ind_sh = ds.latCell < 0 - areaCell_nh = areaCell.where(ind_nh) - areaCell_sh = areaCell.where(ind_sh) - - for varname in varnames: - obs_filenameNH = obs_filenames[varname][0] - obs_filenameSH = obs_filenames[varname][1] - plot_title = plot_titles[varname] - units = units_dict[varname] - - print ' Compute NH and SH time series of {}...'.format(varname) - if varname == 'iceThickCell': - varnamefull = 'iceVolumeCell' - else: - varnamefull = varname - var = ds[varnamefull] - - var_nh = var.where(ind_nh)*areaCell_nh - var_sh = var.where(ind_sh)*areaCell_sh - - ind_iceext = var > 0.15 - var_nh_iceext = var_nh.where(ind_iceext) - var_sh_iceext = var_sh.where(ind_iceext) - - if varname == 'iceAreaCell': - var_nh = var_nh.sum('nCells') - var_sh = var_sh.sum('nCells') - var_nh = 1e-6*var_nh # m^2 to km^2 - var_sh = 1e-6*var_sh # m^2 to km^2 - var_nh_iceext = 1e-6*var_nh_iceext.sum('nCells') - var_sh_iceext = 1e-6*var_sh_iceext.sum('nCells') - elif varname == 'iceVolumeCell': - var_nh = var_nh.sum('nCells') - var_sh = var_sh.sum('nCells') - var_nh = 1e-3*1e-9*var_nh # m^3 to 10^3 km^3 - var_sh = 1e-3*1e-9*var_sh # m^3 to 10^3 km^3 - else: - var_nh = var_nh.mean('nCells')/areaCell_nh.mean('nCells') - var_sh = var_sh.mean('nCells')/areaCell_sh.mean('nCells') - - print ' Make plots...' - - xlabel = 'Time [years]' - - if ref_casename_v0 != 'None': - figname_nh = '{}/{}NH_{}_{}.png'.format(plots_dir, varname, - casename, ref_casename_v0) - figname_sh = '{}/{}SH_{}_{}.png'.format(plots_dir, varname, - casename, ref_casename_v0) - else: - figname_nh = '{}/{}NH_{}.png'.format(plots_dir, varname, casename) - figname_sh = '{}/{}SH_{}.png'.format(plots_dir, varname, casename) - - title_nh = '{} (NH), {} (r)'.format(plot_title, casename) - title_sh = '{} (SH), {} (r)'.format(plot_title, casename) - - if compare_with_obs: - if varname == 'iceAreaCell': - title_nh = \ - '{}\nSSM/I observations, annual cycle (k)'.format(title_nh) - title_sh = \ - '{}\nSSM/I observations, annual cycle (k)'.format(title_sh) - elif varname == 'iceVolumeCell': - title_nh = '{}\nPIOMAS, annual cycle (k)'.format(title_nh) - title_sh = '{}\n'.format(title_sh) - - if ref_casename_v0 != 'None': - title_nh = '{}\n {} (b)'.format(title_nh, ref_casename_v0) - title_sh = '{}\n {} (b)'.format(title_sh, ref_casename_v0) - - if varname == 'iceAreaCell': - - if compare_with_obs: - ds_obs = xr.open_mfdataset( - obs_filenameNH, - preprocess=lambda x: preprocess_mpas(x, - yearoffset=yr_offset)) - ds_obs = remove_repeated_time_index(ds_obs) - var_nh_obs = ds_obs.IceArea - var_nh_obs = replicate_cycle(var_nh, var_nh_obs) - - ds_obs = xr.open_mfdataset( - obs_filenameSH, - preprocess=lambda x: preprocess_mpas(x, - yearoffset=yr_offset)) - ds_obs = remove_repeated_time_index(ds_obs) - var_sh_obs = ds_obs.IceArea - var_sh_obs = replicate_cycle(var_sh, var_sh_obs) - - if ref_casename_v0 != 'None': - infiles_v0data = '{}/icearea.{}.year*.nc'.format( - indir_v0data, ref_casename_v0) - ds_v0 = xr.open_mfdataset( - infiles_v0data, - preprocess=lambda x: preprocess_mpas(x, - yearoffset=yr_offset)) - ds_v0_tslice = ds_v0.sel(Time=slice(time_start, time_end)) - var_nh_v0 = ds_v0_tslice.icearea_nh - var_sh_v0 = ds_v0_tslice.icearea_sh - - elif varname == 'iceVolumeCell': - - if compare_with_obs: - ds_obs = xr.open_mfdataset( - obs_filenameNH, - preprocess=lambda x: preprocess_mpas(x, - yearoffset=yr_offset)) - ds_obs = remove_repeated_time_index(ds_obs) - var_nh_obs = ds_obs.IceVol - var_nh_obs = replicate_cycle(var_nh, var_nh_obs) - - var_sh_obs = None - - if ref_casename_v0 != 'None': - infiles_v0data = '{}/icevol.{}.year*.nc'.format( - indir_v0data, ref_casename_v0) - ds_v0 = xr.open_mfdataset( - infiles_v0data, - preprocess=lambda x: preprocess_mpas(x, - yearoffset=yr_offset)) - ds_v0_tslice = ds_v0.sel(Time=slice(time_start, time_end)) - var_nh_v0 = ds_v0_tslice.icevolume_nh - var_sh_v0 = ds_v0_tslice.icevolume_sh - - if varname in ['iceAreaCell', 'iceVolumeCell']: - if compare_with_obs: - if ref_casename_v0 != 'None': - vars_nh = [var_nh, var_nh_obs, var_nh_v0] - vars_sh = [var_sh, var_sh_obs, var_sh_v0] + print ' Warning: Preprocessed time series ends before the ' \ + 'timeSeries startYear and will not be plotted.' + preprocessedReferenceRunName = 'None' + + norm = {'iceArea': 1e-6, # m^2 to km^2 + 'iceVolume': 1e-12, # m^3 to 10^3 km^3 + 'iceThickness': 1.} + + xLabel = 'Time [years]' + + dsTimeSeries = {} + obs = {} + preprocessed = {} + figureNameStd = {} + figureNamePolar = {} + title = {} + plotVars = {} + + for hemisphere in ['NH', 'SH']: + print ' Caching {} data'.format(hemisphere) + cacheFileName = '{}/seaIceAreaVolumeTimeSeries_{}.nc'.format( + outputDirectory, hemisphere) + + dsTimeSeries[hemisphere] = time_series.cache_time_series( + ds.Time.values, compute_area_vol_part, cacheFileName, calendar, + yearsPerCacheUpdate=10, printProgress=True) + + print ' Make {} plots...'.format(hemisphere) + + for variableName in ['iceArea', 'iceVolume']: + key = (hemisphere, variableName) + + # apply the norm to each variable + plotVars[key] = (norm[variableName] * + dsTimeSeries[hemisphere][variableName]) + + prefix = '{}/{}{}_{}'.format(plotsDirectory, + variableName, + hemisphere, + mainRunName) + + figureNameStd[key] = '{}.png'.format(prefix) + figureNamePolar[key] = '{}_polar.png'.format(prefix) + + title[key] = '{} ({}), {} (r)'.format( + plotTitles[variableName], hemisphere, mainRunName) + + if compareWithObservations: + key = (hemisphere, 'iceArea') + title[key] = '{}\nSSM/I observations, annual cycle (k)'.format( + title[key]) + if hemisphere == 'NH': + key = (hemisphere, 'iceVolume') + title[key] = '{}\nPIOMAS, annual cycle (k)'.format(title[key]) + + if preprocessedReferenceRunName != 'None': + for variableName in ['iceArea', 'iceVolume']: + key = (hemisphere, variableName) + title[key] = '{}\n {} (b)'.format( + title[key], preprocessedReferenceRunName) + + if compareWithObservations: + dsObs = open_multifile_dataset( + fileNames=obsFileNames['iceArea'][hemisphere], + calendar=calendar, + config=config, + timeVariableName='xtime') + key = (hemisphere, 'iceArea') + obs[key] = replicate_cycle(plotVars[key], dsObs.IceArea, calendar) + + key = (hemisphere, 'iceVolume') + if hemisphere == 'NH': + dsObs = open_multifile_dataset( + fileNames=obsFileNames['iceVolume'][hemisphere], + calendar=calendar, + config=config, + timeVariableName='xtime') + obs[key] = replicate_cycle(plotVars[key], dsObs.IceVol, + calendar) + else: + obs[key] = None + + if preprocessedReferenceRunName != 'None': + inFilesPreprocessed = '{}/icearea.{}.year*.nc'.format( + preprocessedReferenceDirectory, + preprocessedReferenceRunName) + dsPreprocessed = open_multifile_dataset( + fileNames=inFilesPreprocessed, + calendar=calendar, + config=config, + timeVariableName='xtime') + dsPreprocessedTimeSlice = dsPreprocessed.sel( + Time=slice(timeStart, timeEnd)) + key = (hemisphere, 'iceArea') + preprocessed[key] = dsPreprocessedTimeSlice[ + 'icearea_{}'.format(hemisphere.lower())] + + inFilesPreprocessed = '{}/icevol.{}.year*.nc'.format( + preprocessedReferenceDirectory, + preprocessedReferenceRunName) + dsPreprocessed = open_multifile_dataset( + fileNames=inFilesPreprocessed, + calendar=calendar, + config=config, + timeVariableName='xtime') + dsPreprocessedTimeSlice = dsPreprocessed.sel( + Time=slice(timeStart, timeEnd)) + key = (hemisphere, 'iceVolume') + preprocessed[key] = dsPreprocessedTimeSlice[ + 'icevolume_{}'.format(hemisphere.lower())] + + for variableName in ['iceArea', 'iceVolume']: + key = (hemisphere, variableName) + if compareWithObservations: + if preprocessedReferenceRunName != 'None': + plotVars[key] = [plotVars[key], obs[key], + preprocessed[key]] lineStyles = ['r-', 'k-', 'b-'] lineWidths = [1.2, 1.2, 1.2] else: # just v1 model and obs - vars_nh = [var_nh, var_nh_obs] - vars_sh = [var_sh, var_sh_obs] + plotVars[key] = [plotVars[key], obs[key]] lineStyles = ['r-', 'k-'] lineWidths = [1.2, 1.2] - elif ref_casename_v0 != 'None': + elif preprocessedReferenceRunName != 'None': # just v1 and v0 models - vars_nh = [var_nh, var_nh_v0] - vars_sh = [var_sh, var_sh_v0] + plotVars[key] = [plotVars[key], preprocessed[key]] lineStyles = ['r-', 'b-'] lineWidths = [1.2, 1.2] - if compare_with_obs or ref_casename_v0 != 'None': + if (compareWithObservations or + preprocessedReferenceRunName != 'None'): # separate plots for nothern and southern hemispheres - timeseries_analysis_plot(config, vars_nh, N_movavg, title_nh, - xlabel, units, figname_nh, + timeseries_analysis_plot(config, plotVars[key], + movingAveragePoints, + title[key], xLabel, + units[variableName], + figureNameStd[key], lineStyles=lineStyles, lineWidths=lineWidths, - title_font_size=title_font_size) - timeseries_analysis_plot(config, vars_sh, N_movavg, title_sh, - xlabel, units, figname_sh, - lineStyles=lineStyles, - lineWidths=lineWidths, - title_font_size=title_font_size) - else: - # we will combine north and south onto a single graph - figname = '{}/{}.{}.png'.format(plots_dir, casename, varname) - title = '{}, NH (r), SH (k)\n{}'.format(plot_title, casename) - timeseries_analysis_plot(config, [var_nh, var_sh], N_movavg, - title, xlabel, units, figname, - lineStyles=['r-', 'k-'], - lineWidths=[1.2, 1.2], - title_font_size=title_font_size) - - elif varname == 'iceThickCell': - - figname = '{}/{}.{}.png'.format(plots_dir, casename, varname) - title = '{} NH (r), SH (k)\n{}'.format(plot_title, casename) - timeseries_analysis_plot(config, [var_nh, var_sh], N_movavg, title, - xlabel, units, figname, + titleFontSize=titleFontSize, + calendar=calendar) + if (polarPlot): + timeseries_analysis_plot_polar(config, plotVars[key], + movingAveragePoints, + title[key], + figureNamePolar[key], + lineStyles=lineStyles, + lineWidths=lineWidths, + titleFontSize=titleFontSize, + calendar=calendar) + if (not compareWithObservations and + preprocessedReferenceRunName == 'None'): + for variableName in ['iceArea', 'iceVolume']: + # we will combine north and south onto a single graph + figureNameStd = '{}/{}.{}.png'.format(plotsDirectory, + mainRunName, + variableName) + figureNamePolar = '{}/{}.{}_polar.png'.format(plotsDirectory, + mainRunName, + variableName) + title = '{}, NH (r), SH (k)\n{}'.format(plotTitles[variableName], + mainRunName) + varList = [plotVars[('NH', variableName)], + plotVars[('SH', variableName)]] + timeseries_analysis_plot(config, varList, + movingAveragePoints, + title, xLabel, units[variableName], + figureNameStd, lineStyles=['r-', 'k-'], lineWidths=[1.2, 1.2], - title_font_size=title_font_size) + titleFontSize=titleFontSize, + calendar=calendar) + if (polarPlot): + timeseries_analysis_plot_polar(config, varList, + movingAveragePoints, + title, figureNamePolar, + lineStyles=['r-', 'k-'], + lineWidths=[1.2, 1.2], + titleFontSize=titleFontSize, + calendar=calendar) + + +def replicate_cycle(ds, dsToReplicate, calendar): + """ + Replicates a periodic time series `dsToReplicate` to cover the timeframe + of the dataset `ds`. - else: - raise ValueError( - 'varname variable {} not supported for plotting'.format( - varname)) + Parameters + ---------- + ds : dataset used to find the start and end time of the replicated cycle + + dsToReplicate : dataset to replicate. The period of the cycle is the + length of dsToReplicate plus the time between the first two time + values (typically one year total). + + calendar : {'gregorian', 'gregorian_noleap'} + The name of one of the calendars supported by MPAS cores + + Returns: + -------- + dsShift : a cyclicly repeated version of `dsToReplicte` covering the range + of time of `ds`. + + Authors + ------- + Xylar Asay-Davis, Milena Veneziani + + Last Modified + ------------- + 02/22/2017 + """ + dsStartTime = days_to_datetime(ds.Time.min(), calendar=calendar) + dsEndTime = days_to_datetime(ds.Time.max(), calendar=calendar) + repStartTime = days_to_datetime(dsToReplicate.Time.min(), + calendar=calendar) + repEndTime = days_to_datetime(dsToReplicate.Time.max(), + calendar=calendar) + + repSecondTime = days_to_datetime(dsToReplicate.Time.isel(Time=1), + calendar=calendar) + + period = (MpasRelativeDelta(repEndTime, repStartTime) + + MpasRelativeDelta(repSecondTime, repStartTime)) + + startIndex = 0 + while(dsStartTime > repStartTime + (startIndex+1)*period): + startIndex += 1 + endIndex = 0 + while(dsEndTime > repEndTime + endIndex*period): + endIndex += 1 -def replicate_cycle(ds, ds_toreplicate): - dsshift = ds_toreplicate.copy() - shiftT = ((dsshift.Time.max() - dsshift.Time.min()) + - (dsshift.Time.isel(Time=1) - dsshift.Time.isel(Time=0))) - startIndex = int(np.floor((ds.Time.min()-ds_toreplicate.Time.min())/shiftT)) - endIndex = int(np.ceil((ds.Time.max()-ds_toreplicate.Time.min())/shiftT)) - dsshift['Time'] = dsshift['Time'] + startIndex*shiftT + dsShift = dsToReplicate.copy() + times = days_to_datetime(dsShift.Time, calendar=calendar) + dsShift.coords['Time'] = ('Time', + datetime_to_days(times + startIndex*period, + calendar=calendar)) # replicate cycle: for cycleIndex in range(startIndex, endIndex): - dsnew = ds_toreplicate.copy() - dsnew['Time'] = dsnew['Time'] + (cycleIndex+1)*shiftT - dsshift = xr.concat([dsshift, dsnew], dim='Time') - # constrict replicated ds_short to same time dimension as ds_long: - dsshift = dsshift.sel(Time=ds.Time.values, method='nearest') - return dsshift + dsNew = dsToReplicate.copy() + dsNew.coords['Time'] = ('Time', + datetime_to_days(times + (cycleIndex+1)*period, + calendar=calendar)) + dsShift = xr.concat([dsShift, dsNew], dim='Time') + + # clip dsShift to the range of ds + dsStartTime = dsShift.Time.sel(Time=ds.Time.min(), method='nearest').values + dsEndTime = dsShift.Time.sel(Time=ds.Time.max(), method='nearest').values + dsShift = dsShift.sel(Time=slice(dsStartTime, dsEndTime)) + + return dsShift diff --git a/mpas_analysis/sea_ice/utility.py b/mpas_analysis/sea_ice/utility.py new file mode 100644 index 000000000..2c5754ba6 --- /dev/null +++ b/mpas_analysis/sea_ice/utility.py @@ -0,0 +1,107 @@ +''' +common utility functions for sea ice analysis tasks + +Xylar Asay-Davis + +Last Modified: 04/03/2017 +''' + +from ..shared.io import StreamsFile +from ..shared.io.utility import build_config_full_path + +from ..shared.timekeeping.utility import get_simulation_start_time + +from ..shared.analysis_task import setup_task + + +def setup_sea_ice_task(config): # {{{ + ''' + Perform steps to set up the sea ice analysis (e.g. reading namelists and + streams files, finding a restart file). + + Parameters + ---------- + config : instance of MpasAnalysisConfigParser + Contains configuration options + + Returns + ------- + namelist : NameList object + for parsing namelist options + + runStreams : StreamsFile object + for parsing the streams file related to output in the run subdirectory + + historyStreams : StreamsFile object + for parsing the streams file related to output in the history + subdirectory + + calendar: {'gregorian', 'gregorian_noleap'} + The name of the calendars used in the MPAS run + + streamMap : dict + A dictionary of MPAS stream names that map to their mpas_analysis + counterparts. + + variableMap : dict + A dictionary of MPAS variable names that map to their mpas_analysis + counterparts. + + plotsDirectory : str + the directories for writing plots + + simulationStartTime : str + the date and time of the start of the simulation + + restartFileName : str + the path to a restart file (which may be an MPAS-O restart file is no + MPAS-SeaIce restart could be found) + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/03/2017 + ''' + # perform common setup for the task + namelist, runStreams, historyStreams, calendar, namelistMap, streamMap, \ + variableMap, plotsDirectory = setup_task(config, + componentName='seaIce') + + try: + simulationStartTime = get_simulation_start_time(runStreams) + except IOError: + # try the ocean stream instead + runDirectory = build_config_full_path(config, 'input', + 'runSubdirectory') + oceanStreamsFileName = build_config_full_path( + config, 'input', 'oceanStreamsFileName') + oceanStreams = StreamsFile(oceanStreamsFileName, + streamsdir=runDirectory) + simulationStartTime = get_simulation_start_time(oceanStreams) + + try: + restartFileName = runStreams.readpath('restart')[0] + except ValueError: + # get an ocean restart file, since no sea-ice restart exists + try: + runDirectory = build_config_full_path(config, 'input', + 'runSubdirectory') + oceanStreamsFileName = build_config_full_path( + config, 'input', 'oceanStreamsFileName') + oceanStreams = StreamsFile(oceanStreamsFileName, + streamsdir=runDirectory) + restartFileName = oceanStreams.readpath('restart')[0] + except ValueError: + raise IOError('No MPAS-O or MPAS-Seaice restart file found: need ' + 'at least one restart file for seaice_timeseries ' + 'calculation') + + return namelist, runStreams, historyStreams, calendar, namelistMap, \ + streamMap, variableMap, plotsDirectory, simulationStartTime, \ + restartFileName + # }}} + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python \ No newline at end of file diff --git a/mpas_analysis/sea_ice/variable_stream_map.py b/mpas_analysis/sea_ice/variable_stream_map.py index 4be6852dc..114f5e071 100644 --- a/mpas_analysis/sea_ice/variable_stream_map.py +++ b/mpas_analysis/sea_ice/variable_stream_map.py @@ -1,5 +1,18 @@ -# mappings of stream names from various MPAS-SI versions to those in -# mpas_analysis +''' +Mappings of namelist options, stream names and variable names from various +MPAS-SeaIce versions to those used by mpas_analysis + +Authors +------- +Xylar Asay-Davis + +Last Modified +------------- +03/29/2017 +''' + +seaIceNamelistMap = {} + seaIceStreamMap = {'timeSeriesStats': ['timeSeriesStatsMonthlyOutput']} diff --git a/mpas_analysis/shared/analysis_task.py b/mpas_analysis/shared/analysis_task.py new file mode 100644 index 000000000..7e777822d --- /dev/null +++ b/mpas_analysis/shared/analysis_task.py @@ -0,0 +1,184 @@ +''' +common utility functions for analysis tasks + +Xylar Asay-Davis + +Last Modified: 03/23/2017 +''' + +import warnings + +from .io import NameList, StreamsFile +from .io.utility import build_config_full_path, make_directories + +from ..ocean.variable_stream_map import oceanNamelistMap, oceanStreamMap, \ + oceanVariableMap + +from ..sea_ice.variable_stream_map import seaIceNamelistMap, seaIceStreamMap, \ + seaIceVariableMap + + +def setup_task(config, componentName): # {{{ + ''' + Perform steps to set up the analysis (e.g. reading namelists and + streams files). + + Parameters + ---------- + config : instance of MpasAnalysisConfigParser + Contains configuration options + + componentName : {'ocean', 'seaIce', 'landIce'} + The name of a MPAS core to be analyized + + Returns + ------- + namelist : NameList object + for parsing namelist options + + runStreams : StreamsFile object + for parsing the streams file related to output in the run subdirectory + + historyStreams : StreamsFile object + for parsing the streams file related to output in the history + subdirectory + + calendar: {'gregorian', 'gregorian_noleap'} + The name of the calendars used in the MPAS run + + namelistMap : dict + A dictionary of MPAS namelist options that map to their mpas_analysis + counterparts. + + streamMap : dict + A dictionary of MPAS stream names that map to their mpas_analysis + counterparts. + + variableMap : dict + A dictionary of MPAS variable names that map to their mpas_analysis + counterparts. + + plotsDirectory : str + the directories for writing plots + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 03/23/2017 + ''' + + # read parameters from config file + # the run directory contains the restart files + runDirectory = build_config_full_path(config, 'input', 'runSubdirectory') + # if the history directory exists, use it; if not, fall back on + # runDirectory + historyDirectory = build_config_full_path( + config, 'input', '{}HistorySubdirectory'.format(componentName), + defaultPath=runDirectory) + + namelistFileName = build_config_full_path( + config, 'input', '{}NamelistFileName'.format(componentName)) + namelist = NameList(namelistFileName) + + streamsFileName = build_config_full_path( + config, 'input', '{}StreamsFileName'.format(componentName)) + runStreams = StreamsFile(streamsFileName, streamsdir=runDirectory) + historyStreams = StreamsFile(streamsFileName, streamsdir=historyDirectory) + + calendar = namelist.get('config_calendar_type') + + plotsDirectory = build_config_full_path(config, 'output', + 'plotsSubdirectory') + make_directories(plotsDirectory) + + if componentName == 'ocean': + namelistMap = oceanNamelistMap + streamMap = oceanStreamMap + variableMap = oceanVariableMap + elif componentName == 'seaIce': + namelistMap = seaIceNamelistMap + streamMap = seaIceStreamMap + variableMap = seaIceVariableMap + else: + namelistMap = None + streamMap = None + variableMap = None + + return namelist, runStreams, historyStreams, calendar, namelistMap, \ + streamMap, variableMap, plotsDirectory # }}} + + +def check_analysis_enabled(namelist, analysisOptionName, namelistMap=None, + default=False, raiseException=True): + ''' + Check to make sure a given analysis is turned on, issuing a warning or + raising an exception if not. + + Parameters + ---------- + namelist : NameList object + for parsing namelist options + + analysisOptionName : str + The name of a boolean namelist option indicating whether the given + analysis member is enabled + + namelistMap : dict, optional + A dictionary of MPAS namelist options that map to their mpas_analysis + counterparts. + + default : bool, optional + If no analysis option with the given name can be found, indicates + whether the given analysis is assumed to be enabled by default. + + raiseException : bool, optional + Whether + + Returns + ------- + enabled : bool + Whether the given analysis is enabled + + Raises + ------ + RuntimeError + If the given analysis option is not found and ``default`` is not + ``True`` or if the analysis option is found and is ``False``. The + exception is only raised if ``raiseException = True``. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/01/2017 + ''' + + try: + if namelistMap is None: + optionName = analysisOptionName + else: + optionName = namelist.find_option(namelistMap[analysisOptionName]) + enabled = namelist.getbool(optionName) + except ValueError: + enabled = default + if default: + message = 'WARNING: namelist option {} not found.\n' \ + 'This likely indicates that the simulation you are ' \ + 'analyzing was run with an\n' \ + 'older version of MPAS-O that did not support this ' \ + 'flag. Assuming enabled.'.format(analysisOptionName) + warnings.warn(message) + + if not enabled and raiseException: + raise RuntimeError('*** MPAS-Analysis relies on {} = .true.\n' + '*** Make sure to enable this analysis ' + 'member.'.format(analysisOptionName)) + + return enabled + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/climatology/__init__.py b/mpas_analysis/shared/climatology/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/mpas_analysis/shared/climatology/climatology.py b/mpas_analysis/shared/climatology/climatology.py new file mode 100644 index 000000000..52d912645 --- /dev/null +++ b/mpas_analysis/shared/climatology/climatology.py @@ -0,0 +1,992 @@ +""" +Functions for creating climatologies from monthly time series data + +Authors +------- +Xylar Asay-Davis + +Last Modified +------------- +04/08/2017 +""" + +import xarray as xr +import os +import numpy +import netCDF4 +import warnings + +from ..constants import constants + +from ..timekeeping.utility import days_to_datetime + +from ..io.utility import build_config_full_path, make_directories + +from ..interpolation import interpolate + + +def write_mpas_mapping_file(config, meshFileName): + """ + Given config options, the name of the component being analyzed and an + MPAS mesh file, either finds an existing MPAS-to-comparison-grid mapping + file or creates a new mapping file. + + Parameters + ---------- + config : instance of MpasAnalysisConfigParser + Contains configuration options + + meshFileName : str + The path of the file containing the source MPAS mesh + + Returns + ------- + mpasMappingFileName : str + The absolute path to an existing mapping file or the location + at which one was created. The mapping file can be used to + interpolate between MPAS meshes and the climatology comparison grid + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 03/04/2017 + """ + + climSection = 'climatology' + + method = config.getWithDefault(climSection, 'mpasInterpolationMethod', + 'bilinear') + + comparisonLatRes = config.getWithDefault(climSection, + 'comparisonLatResolution', + constants.dLatitude) + comparisonLonRes = config.getWithDefault(climSection, + 'comparisonLatResolution', + constants.dLongitude) + + (lat, lon) = _get_comparison_lat_lon(comparisonLatRes, comparisonLonRes) + + overwriteMapping = config.getWithDefault(climSection, + 'overwriteMapping', + False) + + mappingFileOption = 'mpasMappingFile' + if config.has_option(climSection, mappingFileOption): + # a mapping file was supplied, so we'll use that name + mpasMappingFileName = config.get(climSection, mappingFileOption) + else: + # we need to build the path to the mapping file and an appropriate + # file name + mappingSubdirectory = build_config_full_path(config, 'output', + 'mappingSubdirectory') + + make_directories(mappingSubdirectory) + + meshName = config.get('input', 'mpasMeshName') + + mpasMappingFileName = '{}/map_{}_to_{}x{}degree_{}.nc'.format( + mappingSubdirectory, meshName, comparisonLatRes, comparisonLonRes, + method) + + config.set(climSection, mappingFileOption, mpasMappingFileName) + + interpolate.build_remap_weights(sourceFileName=meshFileName, + outWeightFileName=mpasMappingFileName, + sourceFileType='mpas', + destinationLat=lat, + destinationLon=lon, + desitnationUnits='degrees', + method=method, + overwrite=overwriteMapping) + + return mpasMappingFileName + + +def write_observations_mapping_file(config, componentName, fieldName, + gridFileName, latVarName='lat', + lonVarName='lon'): + """ + Given config options, the name of the component being analyzed and a + grid file containing 1D lat and lon arrays, either finds an existing + obs-grid-to-comparison-grid mapping file or creates a new mapping file. + + Parameters + ---------- + config : instance of MpasAnalysisConfigParser + Contains configuration options + + componentName : {'ocean', 'seaIce'} + Name of the component, used to look up climatology and observation + options + + fieldName : str + Name of the field being mapped, used to give each set of + observation weights a unique name. + + gridFileName : str + The path of the file containing the source lat-lon grid + + latVarName, lonVarName : str, optional + The name of the latitude and longitude variables in the source grid + file + + Returns + ------- + obsMappingFileName : str + The absolute path to a mapping file (or the location + at which to create one) for interpolation between MPAS meshes and + the climatology comparison grid + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 03/04/2017 + """ + + mappingFileOption = '{}ClimatologyMappingFile'.format(fieldName) + climSection = 'climatology' + obsSection = '{}Observations'.format(componentName) + + comparisonLatRes = config.getWithDefault(climSection, + 'comparisonLatResolution', + constants.dLatitude) + comparisonLonRes = config.getWithDefault(climSection, + 'comparisonLatResolution', + constants.dLongitude) + + (outLat, outLon) = _get_comparison_lat_lon(comparisonLatRes, + comparisonLonRes) + + method = config.getWithDefault(obsSection, 'interpolationMethod', + 'bilinear') + + overwriteMapping = config.getWithDefault(climSection, + 'overwriteMapping', + False) + + if config.has_option(obsSection, mappingFileOption): + obsMappingFileName = config.get(obsSection, mappingFileOption) + else: + + (gridName, matchesComparison) = _get_grid_name(gridFileName, + latVarName, + lonVarName, + comparisonLatRes, + comparisonLonRes) + + if matchesComparison: + # no need to remap the observations + obsMappingFileName = None + else: + mappingSubdirectory = build_config_full_path(config, 'output', + 'mappingSubdirectory') + + make_directories(mappingSubdirectory) + + obsMappingFileName = \ + '{}/map_obs_{}_{}_to_{}x{}degree_{}.nc'.format( + mappingSubdirectory, fieldName, gridName, + comparisonLatRes, comparisonLonRes, method) + + config.set(obsSection, mappingFileOption, obsMappingFileName) + + if obsMappingFileName is not None: + interpolate.build_remap_weights( + sourceFileName=gridFileName, + outWeightFileName=obsMappingFileName, + sourceFileType='latlon', + sourceLatVarName=latVarName, + sourceLonVarName=lonVarName, + destinationLat=outLat, + destinationLon=outLon, + desitnationUnits='degrees', + method=method, + overwrite=overwriteMapping) + + return obsMappingFileName + + +def get_mpas_climatology_file_names(config, fieldName, monthNames): + """ + Given config options, the name of a field and a string identifying the + months in a seasonal climatology, returns the full path for MPAS + climatology files before and after regridding. + + Parameters + ---------- + config : instance of MpasAnalysisConfigParser + Contains configuration options + + fieldName : str + Name of the field being mapped, used as a prefix for the climatology + file name. + + monthNames : str + A string identifying the months in a seasonal climatology (e.g. 'JFM') + + Returns + ------- + climatologyFileName : str + The absolute path to a file where the climatology should be stored + before regridding. + + climatologyPrefix : str + The prfix including absolute path for climatology cache files before + regridding. + + regriddedFileName : str + The absolute path to a file where the climatology should be stored + after regridding. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 03/03/2017 + """ + + climSection = 'climatology' + startYear = config.getint(climSection, 'startYear') + endYear = config.getint(climSection, 'endYear') + + meshName = config.get('input', 'mpasMeshName') + + comparisonLatRes = config.getWithDefault(climSection, + 'comparisonLatResolution', + constants.dLatitude) + comparisonLonRes = config.getWithDefault(climSection, + 'comparisonLatResolution', + constants.dLongitude) + + climatologyDirectory = build_config_full_path( + config, 'output', 'mpasClimatologySubdirectory') + + regriddedDirectory = build_config_full_path( + config, 'output', 'mpasRegriddedClimSubdirectory') + + make_directories(regriddedDirectory) + make_directories(climatologyDirectory) + + climatologyPrefix = '{}/{}_{}_{}'.format(climatologyDirectory, fieldName, + meshName, monthNames) + climatologyFileName = '{}_years{:04d}-{:04d}.nc'.format(climatologyPrefix, + startYear, + endYear) + regriddedFileName = \ + '{}/{}_{}_to_{}x{}degree_{}_years{:04d}-{:04d}.nc'.format( + regriddedDirectory, fieldName, meshName, comparisonLatRes, + comparisonLonRes, monthNames, startYear, endYear) + + return (climatologyFileName, climatologyPrefix, regriddedFileName) + + +def get_observation_climatology_file_names(config, fieldName, monthNames, + componentName, gridFileName, + latVarName='lat', lonVarName='lon'): + """ + Given config options, the name of a field and a string identifying the + months in a seasonal climatology, returns the full path for observation + climatology files before and after regridding. + + Parameters + ---------- + config : instance of MpasAnalysisConfigParser + Contains configuration options + + fieldName : str + Name of the field being mapped, used as a prefix for the climatology + file name. + + monthNames : str + A string identifying the months in a seasonal climatology (e.g. 'JFM') + + gridFileName : str + The path of the file containing the source lat-lon grid + + latVarName, lonVarName : str, optional + The name of the latitude and longitude variables in the source grid + file + + Returns + ------- + climatologyFileName : str + The absolute path to a file where the climatology should be stored + before regridding. + + regriddedFileName : str + The absolute path to a file where the climatology should be stored + after regridding. + + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 03/03/2017 + """ + + climSection = 'climatology' + obsSection = '{}Observations'.format(componentName) + + comparisonLatRes = config.getWithDefault(climSection, + 'comparisonLatResolution', + constants.dLatitude) + comparisonLonRes = config.getWithDefault(climSection, + 'comparisonLatResolution', + constants.dLongitude) + + climatologyDirectory = build_config_full_path( + config=config, section='output', + relativePathOption='climatologySubdirectory', + relativePathSection=obsSection) + + regriddedDirectory = build_config_full_path( + config=config, section='output', + relativePathOption='regriddedClimSubdirectory', + relativePathSection=obsSection) + + (gridName, matchesComparison) = _get_grid_name(gridFileName, + latVarName, + lonVarName, + comparisonLatRes, + comparisonLonRes) + + climatologyFileName = '{}/{}_{}_{}.nc'.format( + climatologyDirectory, fieldName, gridName, monthNames) + regriddedFileName = '{}/{}_{}_to_{}x{}degree_{}.nc'.format( + regriddedDirectory, fieldName, gridName, comparisonLatRes, + comparisonLonRes, monthNames) + + make_directories(climatologyDirectory) + + if not matchesComparison: + make_directories(regriddedDirectory) + + return (climatologyFileName, regriddedFileName) + + +def compute_monthly_climatology(ds, calendar=None, maskVaries=True): + """ + Compute monthly climatologies from a data set. The mean is weighted but + the number of days in each month of the data set, ignoring values masked + out with NaNs. If the month coordinate is not present, a data array + ``month`` will be added based on ``Time`` and the provided calendar. + + Parameters + ---------- + ds : ``xarray.Dataset`` or ``xarray.DataArray`` object + A data set with a ``Time`` coordinate expressed as days since + 0001-01-01 or ``month`` coordinate + + calendar : ``{'gregorian', 'gregorian_noleap'}``, optional + The name of one of the calendars supported by MPAS cores, used to + determine ``month`` from ``Time`` coordinate, so must be supplied if + ``ds`` does not already have a ``month`` coordinate or data array + + maskVaries: bool, optional + If the mask (where variables in ``ds`` are ``NaN``) varies with time. + If not, the weighted average does not need make extra effort to account + for the mask. Most MPAS fields will have masks that don't vary in + time, whereas observations may sometimes be present only at some + times and not at others, requiring ``maskVaries = True``. + + Returns + ------- + climatology : object of same type as ``ds`` + A data set without the ``'Time'`` coordinate containing the mean + of ds over all months in monthValues, weighted by the number of days + in each month. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + """ + + def compute_one_month_climatology(ds): + monthValues = list(ds.month.values) + return compute_climatology(ds, monthValues, calendar, maskVaries) + + ds = add_years_months_days_in_month(ds, calendar) + + monthlyClimatology = \ + ds.groupby('month').apply(compute_one_month_climatology) + + return monthlyClimatology + + +def compute_climatology(ds, monthValues, calendar=None, maskVaries=True): + """ + Compute a monthly, seasonal or annual climatology data set from a data + set. The mean is weighted but the number of days in each month of + the data set, ignoring values masked out with NaNs. If the month + coordinate is not present, a data array ``month`` will be added based + on ``Time`` and the provided calendar. + + Parameters + ---------- + ds : ``xarray.Dataset`` or ``xarray.DataArray`` object + A data set with a ``Time`` coordinate expressed as days since + 0001-01-01 or ``month`` coordinate + + monthValues : int or array-like of ints + A single month or an array of months to be averaged together + + calendar : ``{'gregorian', 'gregorian_noleap'}``, optional + The name of one of the calendars supported by MPAS cores, used to + determine ``month`` from ``Time`` coordinate, so must be supplied if + ``ds`` does not already have a ``month`` coordinate or data array + + maskVaries: bool, optional + If the mask (where variables in ``ds`` are ``NaN``) varies with time. + If not, the weighted average does not need make extra effort to account + for the mask. Most MPAS fields will have masks that don't vary in + time, whereas observations may sometimes be present only at some + times and not at others, requiring ``maskVaries = True``. + + Returns + ------- + climatology : object of same type as ``ds`` + A data set without the ``'Time'`` coordinate containing the mean + of ds over all months in monthValues, weighted by the number of days + in each month. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + """ + + ds = add_years_months_days_in_month(ds, calendar) + + mask = xr.zeros_like(ds.month, bool) + + for month in monthValues: + mask = xr.ufuncs.logical_or(mask, ds.month == month) + + climatologyMonths = ds.where(mask, drop=True) + + climatology = _compute_masked_mean(climatologyMonths, maskVaries) + + return climatology + + +def cache_climatologies(ds, monthValues, config, cachePrefix, calendar, + printProgress=False): # {{{ + ''' + Cache NetCDF files for each year of an annual climatology, and then use + the cached files to compute a climatology for the full range of years. + The start and end years of the climatology are taken from ``config``, and + are updated in ``config`` if the data set ``ds`` doesn't contain this + full range. + + Note: only works with climatologies where the mask (locations of ``NaN`` + values) doesn't vary with time. + + Parameters + ---------- + ds : ``xarray.Dataset`` or ``xarray.DataArray`` object + A data set with a ``Time`` coordinate expressed as days since + 0001-01-01 + + monthValues : int or array-like of ints + A single month or an array of months to be averaged together + + config : instance of MpasAnalysisConfigParser + Contains configuration options + + cachePrefix : str + The file prefix (including path) to which the year (or years) will be + appended as cache files are stored + + calendar : ``{'gregorian', 'gregorian_noleap'}`` + The name of one of the calendars supported by MPAS cores, used to + determine ``year`` and ``month`` from ``Time`` coordinate + + printProgress: bool, optional + Whether progress messages should be printed as the climatology is + computed + + Returns + ------- + climatology : object of same type as ``ds`` + A data set without the ``'Time'`` coordinate containing the mean + of ds over all months in monthValues, weighted by the number of days + in each month. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/11/2017 + ''' + startYearClimo = config.getint('climatology', 'startYear') + endYearClimo = config.getint('climatology', 'endYear') + yearsPerCacheFile = config.getint('climatology', 'yearsPerCacheFile') + + if printProgress: + print ' Computing and caching climatologies covering {}-year ' \ + 'spans...'.format(yearsPerCacheFile) + + ds = add_years_months_days_in_month(ds, calendar) + + cacheInfo, cacheIndices = _setup_climatology_caching(ds, startYearClimo, + endYearClimo, + yearsPerCacheFile, + cachePrefix, + monthValues) + + ds = ds.copy() + ds.coords['cacheIndices'] = ('Time', cacheIndices) + + # compute and store each cache file with interval yearsPerCacheFile + _cache_individual_climatologies(ds, cacheInfo, printProgress, + yearsPerCacheFile, monthValues, + calendar) + + # compute the aggregate climatology + climatology = _cache_aggregated_climatology(startYearClimo, endYearClimo, + cachePrefix, printProgress, + monthValues, cacheInfo) + + return climatology # }}} + + +def update_start_end_year(ds, config, calendar): + """ + Given a monthly climatology, compute a seasonal climatology weighted by + the number of days in each month (on the no-leap-year calendar). + + Parameters + ---------- + ds : instance of xarray.Dataset + A data set from which start and end years will be determined + + config : instance of MpasAnalysisConfigParser + Contains configuration options + + calendar : {'gregorian', 'gregorian_noleap'} + The name of one of the calendars supported by MPAS cores + + Returns + ------- + changed : bool + Whether the start and end years were changed + + startYear, endYear : int + The start and end years of the data set + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 03/25/2017 + """ + requestedStartYear = config.getint('climatology', 'startYear') + requestedEndYear = config.getint('climatology', 'endYear') + + startYear = days_to_datetime(ds.Time.min().values, calendar=calendar).year + endYear = days_to_datetime(ds.Time.max().values, calendar=calendar).year + changed = False + if startYear != requestedStartYear or endYear != requestedEndYear: + message = "climatology start and/or end year different from " \ + "requested\n" \ + "requestd: {:04d}-{:04d}\n" \ + "actual: {:04d}-{:04d}\n".format(requestedStartYear, + requestedEndYear, + startYear, + endYear) + warnings.warn(message) + config.set('climatology', 'startYear', str(startYear)) + config.set('climatology', 'endYear', str(endYear)) + changed = True + + return changed, startYear, endYear + + +def add_years_months_days_in_month(ds, calendar=None): # {{{ + ''' + Add ``year``, ``month`` and ``daysInMonth`` as data arrays in ``ds``. + The number of days in each month of ``ds`` is computed either using the + ``startTime`` and ``endTime`` if available or assuming ``gregorian_noleap`` + calendar and ignoring leap years. ``year`` and ``month`` are computed + accounting correctly for the the calendar. + + Parameters + ---------- + ds : ``xarray.Dataset`` or ``xarray.DataArray`` object + A data set with a ``Time`` coordinate expressed as days since + 0001-01-01 + + calendar : ``{'gregorian', 'gregorian_noleap'}``, optional + The name of one of the calendars supported by MPAS cores, used to + determine ``year`` and ``month`` from ``Time`` coordinate + + Returns + ------- + ds : object of same type as ``ds`` + The data set with ``year``, ``month`` and ``daysInMonth`` data arrays + added (if not already present) + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + ''' + + if ('year' in ds.coords and 'month' in ds.coords and + 'daysInMonth' in ds.coords): + return ds + + ds = ds.copy() + + if 'year' not in ds.coords or 'month' not in ds.coords: + if calendar is None: + raise ValueError('calendar must be provided if month and year ' + 'coordinate is not in ds.') + datetimes = days_to_datetime(ds.Time, calendar=calendar) + + if 'year' not in ds.coords: + ds.coords['year'] = ('Time', [date.year for date in datetimes]) + + if 'month' not in ds.coords: + ds.coords['month'] = ('Time', [date.month for date in datetimes]) + + if 'daysInMonth' not in ds.coords: + if 'startTime' in ds.coords and 'endTime' in ds.coords: + ds.coords['daysInMonth'] = ds.endTime - ds.startTime + else: + if calendar == 'gregorian': + message = 'The MPAS run used the Gregorian calendar but ' \ + 'does not appear to have\n' \ + 'supplied start and end times. Climatologies ' \ + 'will be computed with\n' \ + 'month durations ignoring leap years.' + warnings.warn(message) + + daysInMonth = numpy.array([constants.daysInMonth[month-1] for + month in ds.month.values], float) + ds.coords['daysInMonth'] = ('Time', daysInMonth) + + return ds # }}} + + +def _compute_masked_mean(ds, maskVaries): + ''' + Compute the time average of data set, masked out where the variables in ds + are NaN and, if ``maskVaries == True``, weighting by the number of days + used to compute each monthly mean time in ds. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + ''' + def ds_to_weights(ds): + # make an identical data set to ds but replacing all data arrays with + # nonnull applied to that data array + weights = ds.copy(deep=True) + if isinstance(ds, xr.core.dataarray.DataArray): + weights = ds.notnull() + elif isinstance(ds, xr.core.dataset.Dataset): + for var in ds.data_vars: + weights[var] = ds[var].notnull() + else: + raise TypeError('ds must be an instance of either xarray.Dataset ' + 'or xarray.DataArray.') + + return weights + + if maskVaries: + dsWeightedSum = (ds * ds.daysInMonth).sum(dim='Time', keep_attrs=True) + dsWeightedSum.compute() + + weights = ds_to_weights(ds) + weights.compute() + + weightSum = (weights * ds.daysInMonth).sum(dim='Time') + weightSum.compute() + + timeMean = dsWeightedSum / weightSum.where(weightSum > 0.) + timeMean.compute() + else: + days = ds.daysInMonth.sum(dim='Time') + days.compute() + + dsWeightedSum = (ds * ds.daysInMonth).sum(dim='Time', keep_attrs=True) + dsWeightedSum.compute() + + timeMean = dsWeightedSum / days.where(days > 0.) + timeMean.compute() + + return timeMean + + +def _get_comparison_lat_lon(comparisonLatRes, comparisonLonRes): + ''' + Returns the lat and lon arrays defining the corners of the comparison + grid. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + ''' + nLat = int((constants.latmax-constants.latmin)/comparisonLatRes)+1 + nLon = int((constants.lonmax-constants.lonmin)/comparisonLonRes)+1 + lat = numpy.linspace(constants.latmin, constants.latmax, nLat) + lon = numpy.linspace(constants.lonmin, constants.lonmax, nLon) + + return (lat, lon) + + +def _get_grid_name(gridFileName, latVarName, lonVarName, comparisonLatRes, + comparisonLonRes): + ''' + Given a grid file with given lat and lon variable names, finds the + resolution of the grid and generates a grid name. Given comparison + lat and lon resolution, determines if the grid matches the comparison grid. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + ''' + inFile = netCDF4.Dataset(gridFileName, 'r') + + # Get info from input file + inLat = numpy.array(inFile.variables[latVarName][:], float) + inLon = numpy.array(inFile.variables[lonVarName][:], float) + inDLat = inLat[1]-inLat[0] + inDLon = inLon[1]-inLon[0] + if 'degree' in inFile.variables[latVarName].units: + inUnits = 'degree' + else: + inUnits = 'radian' + inFile.close() + gridName = '{}x{}{}'.format(abs(inDLat), abs(inDLon), inUnits) + + matchesComparison = ((inUnits == 'degree') and + (comparisonLatRes == inDLat) and + (comparisonLonRes == inDLon) and + (inLat[0]-0.5*inDLat == constants.latmin) and + (inLon[0]-0.5*inDLon == constants.lonmin)) + + return (gridName, matchesComparison) + + +def _setup_climatology_caching(ds, startYearClimo, endYearClimo, + yearsPerCacheFile, cachePrefix, + monthValues): # {{{ + ''' + Determine which cache files already exist, which are incomplete and which + years are present in each cache file (whether existing or to be created). + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + ''' + + cacheInfo = [] + + cacheIndices = -1*numpy.ones(ds.dims['Time'], int) + monthsInDs = ds.month.values + yearsInDs = ds.year.values + + # figure out which files to load and which years go in each file + for firstYear in range(startYearClimo, endYearClimo+1, yearsPerCacheFile): + years = range(firstYear, firstYear+yearsPerCacheFile) + + if yearsPerCacheFile == 1: + yearString = '{:04d}'.format(years[0]) + outputFileClimo = '{}_year{}.nc'.format(cachePrefix, yearString) + else: + yearString = '{:04d}-{:04d}'.format(years[0], years[-1]) + outputFileClimo = '{}_years{}.nc'.format(cachePrefix, yearString) + + done = False + if os.path.exists(outputFileClimo): + # already cached + dsCached = None + try: + dsCached = xr.open_dataset(outputFileClimo) + except IOError: + # assuming the cache file is corrupt, so deleting it. + message = 'Deleting cache file {}, which appears to have ' \ + 'been corrupted.'.format(outputFileClimo) + warnings.warn(message) + os.remove(outputFileClimo) + + monthsIfDone = len(monthValues)*len(years) + if ((dsCached is not None) and + (dsCached.attrs['totalMonths'] == monthsIfDone)): + # also complete, so we can move on + done = True + if dsCached is not None: + dsCached.close() + + cacheIndex = len(cacheInfo) + for year in years: + for month in monthValues: + mask = numpy.logical_and(yearsInDs == year, + monthsInDs == month) + cacheIndices[mask] = cacheIndex + + if numpy.count_nonzero(cacheIndices == cacheIndex) == 0: + continue + + cacheInfo.append((outputFileClimo, done, yearString)) + + ds = ds.copy() + ds.coords['cacheIndices'] = ('Time', cacheIndices) + + return cacheInfo, cacheIndices # }}} + + +def _cache_individual_climatologies(ds, cacheInfo, printProgress, + yearsPerCacheFile, monthValues, + calendar): # {{{ + ''' + Cache individual climatologies for later aggregation. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + ''' + + for cacheIndex, info in enumerate(cacheInfo): + outputFileClimo, done, yearString = info + if done: + continue + dsYear = ds.where(ds.cacheIndices == cacheIndex, drop=True) + + if printProgress: + print ' {}'.format(yearString) + + totalDays = dsYear.daysInMonth.sum(dim='Time').values + + monthCount = dsYear.dims['Time'] + + climatology = compute_climatology(dsYear, monthValues, calendar, + maskVaries=False) + + climatology.attrs['totalDays'] = totalDays + climatology.attrs['totalMonths'] = monthCount + + climatology.to_netcdf(outputFileClimo) + + # }}} + + +def _cache_aggregated_climatology(startYearClimo, endYearClimo, cachePrefix, + printProgress, monthValues, + cacheInfo): # {{{ + ''' + Cache aggregated climatology from individual climatologies. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + ''' + + if startYearClimo == endYearClimo: + yearString = '{:04d}'.format(startYearClimo) + outputFileClimo = '{}_year{}.nc'.format(cachePrefix, yearString) + else: + yearString = '{:04d}-{:04d}'.format(startYearClimo, endYearClimo) + outputFileClimo = '{}_years{}.nc'.format(cachePrefix, yearString) + + done = False + if os.path.exists(outputFileClimo): + # already cached + climatology = None + try: + climatology = xr.open_dataset(outputFileClimo) + except IOError: + # assuming the cache file is corrupt, so deleting it. + message = 'Deleting cache file {}, which appears to have ' \ + 'been corrupted.'.format(outputFileClimo) + warnings.warn(message) + os.remove(outputFileClimo) + + monthsIfDone = (endYearClimo-startYearClimo+1)*len(monthValues) + if ((climatology is not None) and + (climatology.attrs['totalMonths'] == monthsIfDone)): + # also complete, so we can move on + done = True + + if not done: + if printProgress: + print ' Computing aggregated climatology ' \ + '{}...'.format(yearString) + + first = True + for cacheIndex, info in enumerate(cacheInfo): + inFileClimo = info[0] + ds = xr.open_dataset(inFileClimo) + days = ds.attrs['totalDays'] + months = ds.attrs['totalMonths'] + if first: + totalDays = days + totalMonths = months + climatology = ds * days + first = False + else: + totalDays += days + totalMonths += months + climatology = climatology + ds * days + + climatology = climatology / totalDays + + climatology.attrs['totalDays'] = totalDays + climatology.attrs['totalMonths'] = totalMonths + + climatology.to_netcdf(outputFileClimo) + + return climatology # }}} + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/constants/constants.py b/mpas_analysis/shared/constants/constants.py index 1e1a72f6d..7df2b1bba 100644 --- a/mpas_analysis/shared/constants/constants.py +++ b/mpas_analysis/shared/constants/constants.py @@ -1,25 +1,59 @@ import numpy as np """ - Constants that are common to all ocean model vs observations analysis +Constants that are common to all analysis tasks - Luke Van Roekel - 10/21/2016 +Authors +------- +Luke Van Roekel, Xylar Asay-Davis, Milena Veneziani +Last modified +------------- +03/15/2017 """ -#set parameters for interpolated grid -dLongitude = 1. -dLatitude = 1. +# set parameters for default climatology comparison grid +dLongitude = 0.5 +dLatitude = 0.5 lonmin = -180. lonmax = 180. latmin = -90. latmax = 90. -monthdictionary={'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, - 'Nov':11, 'Dec':12, 'JFM':np.array([1,2,3]), 'AMJ':np.array([4,5,6]), 'JAS':np.array([7,8,9]), - 'OND':np.array([10,11,12]), 'ANN':np.arange(1,13)} +monthsInYear = 12 -dinmonth = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]) +monthDictionary = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, + 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, + 'Dec': 12, 'JFM': np.array([1, 2, 3]), + 'AMJ': np.array([4, 5, 6]), 'JAS': np.array([7, 8, 9]), + 'OND': np.array([10, 11, 12]), 'ANN': np.arange(1, 13), + 'ON': np.array([10, 11]), 'FM': np.array([2, 3]), + 'DJF': np.array([12, 1, 2]), 'JJA': np.array([6, 7, 8])} +daysInMonth = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]) +abrevMonthNames = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", + "Sep", "Oct", "Nov", "Dec"] + +# conversion factor from m^3/s to Sverdrups +m3ps_to_Sv = 1e-6 + +# conversion factor from radians to degrees +rad_to_deg = 180./np.pi + +# conversion factor from degrees to radians +deg_to_rad = np.pi/180. + +# seconds in a year +sec_per_year = 86400. * 365. + +# seconds per month (approximate) +sec_per_month = 86400. * 30. + +# Tapering coefficient for calculating spectral degrees of freedom +tapcoef = 1.055111111111111 + +# small value to prevent division by zero +eps = 1.E-10 + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/generalized_reader/__init__.py b/mpas_analysis/shared/generalized_reader/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/mpas_analysis/shared/generalized_reader/generalized_reader.py b/mpas_analysis/shared/generalized_reader/generalized_reader.py new file mode 100644 index 000000000..d45c31150 --- /dev/null +++ b/mpas_analysis/shared/generalized_reader/generalized_reader.py @@ -0,0 +1,416 @@ +""" +Utility functions for importing MPAS files into xarray. These functions extend +the capabilities of mpas_xarray to include mapping variable names from MPAS +names to MPAS-Analysis generalized names and support for slicing to given +start and end dates. + +open_multifile_dataset : opens a data set, maps variable names, preprocess + the data set removes repeated time indices, and slices the time coordinate + to lie between desired start and end dates. + +Authors +------- +Xylar Asay-Davis + +Last modified +------------- +02/23/2017 +""" + +import xarray +from functools import partial +import resource + +from ..mpas_xarray import mpas_xarray +from ..timekeeping.utility import string_to_days_since_date + + +def open_multifile_dataset(fileNames, calendar, config, + simulationStartTime=None, + timeVariableName='Time', + variableList=None, selValues=None, + iselValues=None, variableMap=None, + startDate=None, endDate=None, + chunking=None): # {{{ + """ + Opens and returns an xarray data set given file name(s) and the MPAS + calendar name. + + Parameters + ---------- + fileNames : list of strings + A lsit of file paths to read + + calendar : {'gregorian', 'gregorian_noleap'}, optional + The name of one of the calendars supported by MPAS cores + + config : instance of MpasAnalysisConfigParser + Contains configuration options + + simulationStartTime : string, optional + The start date of the simulation, used to convert from time variables + expressed as days since the start of the simulation to days since the + reference date. `simulationStartTime` takes one of the following + forms: + 0001-01-01 + + 0001-01-01 00:00:00 + + simulationStartTime is only required if the MPAS time variable + (identified by time_variable_name) is a number of days since the + start of the simulation. + + timeVariableName : string, optional + The name of the time variable (typically 'Time' if using a variableMap + or 'xtime' if not using a variableMap) + + variableList : list of strings, optional + If present, a list of variables to be included in the data set + + selValues : dict, optional + A dictionary of coordinate names (keys) and values or arrays of + values used to slice the variales in the data set. See + xarray.DataSet.sel() for details on how this dictonary is used. + An example: + selectCorrdValues = {'cellLon': 180.0} + + iselValues : dict, optional + A dictionary of coordinate names (keys) and indices, slices or + arrays of indices used to slice the variales in the data set. See + xarray.DataSet.isel() for details on how this dictonary is used. + An example: + iselValues = {'nVertLevels': slice(0, 3), + 'nCells': cellIDs} + + variableMap : dict, optional + A dictionary with keys that are variable names used by + MPAS-Analysis and values that are lists of possible names for the same + variable in the MPAS dycore that produced the data set (which may + differ between versions). + + startDate, endDate : string or datetime.datetime, optional + If present, the first and last dates to be used in the data set. The + time variable is sliced to only include dates within this range. + + chunking : None, int, True, dict, optional + If integer is present, applies maximum chunk size from config file + value ``maxChunkSize``, otherwise if None do not perform chunking. If + True, use automated chunking using default config value + ``maxChunkSize``. If chunking is a dict use dictionary values for + chunking. + + Returns + ------- + ds : ``xarray.Dataset`` + + Raises + ------ + TypeError + If the time variable has an unsupported type (not a date string, + a floating-pont number of days since the start of the simulation + or a numpy.datatime64 object). + + ValueError + If the time variable is not found in the data set or if the time + variable is a number of days since the start of the simulation but + simulationStartTime is None. + + Author + ------ + Xylar Asay-Davis, Phillip J. Wolfram + + Last modified + ------------- + 04/06/2017 + """ + + + preprocess_partial = partial(_preprocess, + calendar=calendar, + simulationStartTime=simulationStartTime, + timeVariableName=timeVariableName, + variableList=variableList, + selValues=selValues, + iselValues=iselValues, + variableMap=variableMap, + startDate=startDate, + endDate=endDate) + + kwargs = {'decode_times': False, + 'concat_dim': 'Time'} + + autocloseFileLimitFraction = config.getfloat('input', + 'autocloseFileLimitFraction') + + # get the number of files that can be open at the same time. We want the + # "soft" limit because we'll get a crash if we exceed it. + softLimit = resource.getrlimit(resource.RLIMIT_NOFILE)[0] + + # use autoclose if we will use more than autocloseFileLimitFraction (50% + # by default) of the soft limit of open files + autoclose = len(fileNames) > softLimit*autocloseFileLimitFraction + + try: + ds = xarray.open_mfdataset(fileNames, + preprocess=preprocess_partial, + autoclose=autoclose, **kwargs) + except TypeError as e: + if 'autoclose' in str(e): + if autoclose: + # This indicates that xarray version doesn't support autoclose + print 'Warning: open_multifile_dataset is trying to use autoclose=True but\n' \ + 'it appears your xarray version doesn\'t support this argument. Will\n' \ + 'try again without autoclose argument.' + + ds = xarray.open_mfdataset(fileNames, + preprocess=preprocess_partial, + **kwargs) + else: + raise e + + ds = mpas_xarray.remove_repeated_time_index(ds) + + if startDate is not None and endDate is not None: + if isinstance(startDate, str): + startDate = string_to_days_since_date(dateString=startDate, + calendar=calendar) + if isinstance(endDate, str): + endDate = string_to_days_since_date(dateString=endDate, + calendar=calendar) + + # select only the data in the specified range of dates + ds = ds.sel(Time=slice(startDate, endDate)) + + # process chunking + if chunking is True: + # limit chunk size to prevent memory error + chunking = config.getint('input', 'maxChunkSize') + + ds = mpas_xarray.process_chunking(ds, chunking) + + # private record of autoclose use + ds.attrs['_autoclose'] = int(autoclose) + + return ds # }}} + + +def _preprocess(ds, calendar, simulationStartTime, timeVariableName, + variableList, selValues, iselValues, variableMap, + startDate, endDate): # {{{ + """ + Performs variable remapping, then calls mpas_xarray.preprocess, to + perform the remainder of preprocessing. + + Parameters + ---------- + ds : xarray.DataSet object + The data set containing an MPAS time variable to be used to build + an xarray time coordinate and with variable names to be + substituted. + + calendar : {'gregorian', 'gregorian_noleap'} + The name of one of the calendars supported by MPAS cores + + The name of the time variable (typically 'Time' if using a variableMap + or 'xtime' if not using a variableMap) + + simulationStartTime : string + The start date of the simulation, used to convert from time variables + expressed as days since the start of the simulation to days since the + reference date. `simulationStartTime` takes one of the following + forms: + 0001-01-01 + + 0001-01-01 00:00:00 + + simulationStartTime is only required if the MPAS time variable + (identified by time_variable_name) is a number of days since the + start of the simulation. + + timeVariableName : string + The name of the time variable (typically 'Time' if using a variable_map + or 'xtime' if not using a variable_map) + + variableList : list of strings + If present, a list of variables to be included in the data set + + selValues : dict + A dictionary of coordinate names (keys) and values or arrays of + values used to slice the variales in the data set. See + xarray.DataSet.sel() for details on how this dictonary is used. + An example: + selectCorrdValues = {'cellLon': 180.0} + + iselValues : dict + A dictionary of coordinate names (keys) and indices, slices or + arrays of indices used to slice the variales in the data set. See + xarray.DataSet.isel() for details on how this dictonary is used. + An example: + iselValues = {'nVertLevels': slice(0, 3), + 'nCells': cellIDs} + + variableMap : dict + A dictionary with keys that are variable names used by + MPAS-Analysis and values that are lists of possible names for the same + variable in the MPAS dycore that produced the data set (which may + differ between versions). + + startDate, endDate : string or datetime.datetime + If present, the first and last dates to be used in the data set. The + time variable is sliced to only include dates within this range. + + Returns + ------- + ds : xarray.DataSet object + A copy of the data set with the time coordinate set and which + has been sliced. + + Authors + ------- + Xylar Asay-Davis, Phillip J. Wolfram + + Last modified + ------------- + 04/06/2017 + """ + + submap = variableMap + + # time_variable_names is a special case so we take it out of the map + # and handle it manually (adding a new variable rather than renaming + # an existing one) + if variableMap is not None and timeVariableName in variableMap: + # make a copy of variableMap and remove timeVariableName + submap = variableMap.copy() + submap.pop(timeVariableName, None) + # temporarily change the time variable name + timeVariableName = \ + _map_variable_name(timeVariableName, + ds, + variableMap) + + if submap is not None: + ds = _rename_variables(ds, submap) + + # now that the variables are mapped, do the normal preprocessing in + # mpas_xarray + ds = mpas_xarray.preprocess(ds, + calendar=calendar, + simulationStartTime=simulationStartTime, + timeVariableName=timeVariableName, + variableList=variableList, + selValues=selValues, + iselValues=iselValues) + + return ds # }}} + + +def _map_variable_name(variableName, ds, variableMap): # {{{ + """ + Given a `variableName` in a `variableMap` and an xarray `ds`, + return the name of the the first variable in `variableMap[variableName]` + that is found in ds. + + variableMap is a dictionary with keys that are variable names used by + MPAS-Analysis and values that are lists of possible names for the same + variable in the MPAS dycore that produced the data set (which may differ + between versions). + + Parameters + ---------- + variableName : string + Name of a variable in `varriableMap` + + ds : `xarray.DataSet` object + A data set in which the mapped variable name should be found + + variableMap : dict + A dictionary with keys that are variable names used by + MPAS-Analysis and values that are lists of possible names for the same + variable in the MPAS dycore that produced the data set (which may + differ between versions). + + Returns + ------- + mappedVariableName : The corresponding variable name to `variableName` + found in `ds`. + + Raises + ------ + ValueError + If none of the possible variable names in `variableMap[variableName]` + can be found in `ds`. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/08/2017 + """ + possibleVariables = variableMap[variableName] + for variable in possibleVariables: + if isinstance(variable, (list, tuple)): + allFound = True + for subvariable in variable: + if subvariable not in ds.data_vars.keys(): + allFound = False + break + if allFound: + return variable + + elif variable in ds.data_vars.keys(): + return variable + + raise ValueError('Variable {} could not be mapped. None of the ' + 'possible mapping variables {}\n match any of the ' + 'variables in {}.'.format( + variableName, possibleVariables, + ds.data_vars.keys())) + # }}} + + +def _rename_variables(ds, variableMap): # {{{ + """ + Given an `xarray.DataSet` object `ds` and a dictionary mapping + variable names `variableMap`, returns a new data set in which variables + from `ds` with names equal to values in `variableMap` are renamed + to the corresponding key in `variableMap`. + + Parameters + ---------- + ds : `xarray.DataSet` object + A data set in which the mapped variable names should be renamed + + variableMap : dict + A dictionary with keys that are variable names used by + MPAS-Analysis and values that are lists of possible names for the same + variable in the MPAS dycore that produced the data set (which may + differ between versions). + + Returns + ------- + outDataSEt : A new `xarray.DataSet` object with the variable renamed. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/08/2017 + """ + + renameDict = {} + for datasetVariable in ds.data_vars: + for mapVariable in variableMap: + renameList = variableMap[mapVariable] + if datasetVariable in renameList: + renameDict[datasetVariable] = mapVariable + break + + return ds.rename(renameDict) # }}} + + +# vim: ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/interpolation/interpolate.py b/mpas_analysis/shared/interpolation/interpolate.py index 5811505c0..7a7644d3a 100644 --- a/mpas_analysis/shared/interpolation/interpolate.py +++ b/mpas_analysis/shared/interpolation/interpolate.py @@ -1,68 +1,234 @@ """ - Module that provides basic nearest neighbor interpolation functionality +Functions for performing interpolation - Author: Luke Van Roekel - Modified: 10/24/2016 +Functions +--------- +build_remap_weights - constructs a mapping file containing the indices and + weights needed to perform horizontal interpolation + +remap - perform horizontal interpolation on a data sets, given a mapping file + +Author +------ +Xylar Asay-Davis + +Last Modified +------------- +03/14/2017 """ -import numpy as np -from scipy.spatial import cKDTree -import sys +import subprocess +import tempfile +import os +from distutils.spawn import find_executable + +from .scrip import mpas_file_to_scrip, lat_lon_file_to_scrip, \ + lat_lon_array_to_scrip -def lon_lat_to_cartesian(lon, lat, R = 6371222.): + +def build_remap_weights(sourceFileName, outWeightFileName, + destintionFileName=None, sourceFileType='mpas', + sourceLatVarName='lat', sourceLonVarName='lon', + destintionLatVarName='lat', destintionLonVarName='lon', + destinationLat=None, destinationLon=None, + desitnationUnits='degrees', + method='bilinear', overwrite=False): # {{{ """ - calculates lon, lat coordinates of a point on a sphere with - radius R + Given a source file defining either an MPAS mesh or a lat-lon grid and + a destination file or set of arrays defining a lat-lon grid, constructs + a mapping file used for interpolation between the source and destination + grids. + + Parameters + ---------- + sourceFileName : str + The path of the file containing either the source MPAS mesh or + the source lat-lon grid + + outWeightFileName : str + The path to which the mapping file containing interpolation weights + and indices should be written + + destintionFileName : str, optional + The path of the file containing the destination lat-lon grid. Should + be None if `destinationLat` and `destinationLon` are supplied instead. + + sourceFileType : {'mpas', 'latlon'} + Whether the source file contains an MPAS mesh or a lat-lon grid + + sourceLatVarName, sourceLonVarName : str, optional + If `sourceFileType == 'latlon'`, the name of the latitude and longitude + variables in the source grid file + + destintionLatVarName, destintionLonVarName : str, optional + If `destintionFileName` is not `None`, the name of the latitude and + longitude variables in the source grid file + + destinationLat, destinationLon : 1D numpy.arrays, optional + One dimensional arrays defining the latitude and longitude coordinates + of grid corners on the destination grid. `destintionFileName` should be + set to `None` if these are supplied + + desitnationUnits : {'degrees', 'radians'}, optional + The units of `destinationLat` and `destinationLon` (if they are + supplied) + + method : {'bilinear', 'neareststod', 'conserve'} + The method of interpolation used, see documentation for + `ESMF_RegridWeightGen` for details. + + overwrite : bool, optional + Whether the mapping file should be overwritten if it already exists. + If `False`, and the mapping file is already present, the function + does nothing and returns immediately, potentially saving a costly + re-computaiton of the mapping file. + + Raises + ------ + OSError + If `ESMF_RegridWeightGen` is not in the system path. + + Author + ------ + Xylar Asay-Davis + + Last Modified + ------------- + 03/14/2017 """ - if max(abs(lon)) < 3.0*np.pi: - lon_r = lon - lat_r = lat + if not overwrite and os.path.exists(outWeightFileName): + # a valid weight file already exists, so nothing to do + return + + if find_executable('ESMF_RegridWeightGen') is None: + raise OSError('ESMF_RegridWeightGen not found. Make sure esmf package ' + 'is installed via\nlatest nco: \n' + 'conda install nco\n' + 'Note: this presumes use of the conda-forge channel.') + + # two temporary SCRIP files, one for the MPAS mesh and one for the dest + # grid + sourceScripFileName = _get_temp_path() + destintionScripFileName = _get_temp_path() + + args = ['ESMF_RegridWeightGen', '--source', sourceScripFileName, + '--destination', destintionScripFileName, + '--weight', outWeightFileName, + '--method', method] + + if sourceFileType == 'mpas': + mpas_file_to_scrip(mpasFileName=sourceFileName, + scripFileName=sourceScripFileName) + args.extend(['--src_regional', '--ignore_unmapped']) + elif sourceFileType == 'latlon': + lat_lon_file_to_scrip(inFileName=sourceFileName, + scripFileName=sourceScripFileName, + latVarName=sourceLatVarName, + lonVarName=sourceLonVarName) + else: + raise ValueError("sourceFileType is neither 'mpas' or 'latlon'.") + + if destintionFileName is not None: + lat_lon_file_to_scrip(inFileName=destintionFileName, + scripFileName=destintionScripFileName, + latVarName=destintionLatVarName, + lonVarName=destintionLonVarName) + elif destinationLat is not None and destinationLon is not None: + lat_lon_array_to_scrip(latCorner=destinationLat, + lonCorner=destinationLon, + units=desitnationUnits, + scripFileName=destintionScripFileName) else: - lon_r = np.radians(lon) - lat_r = np.radians(lat) + raise ValueError('Either destintionFileName or both config and ' + 'sectionName must be supplied.') - x = R * np.cos(lat_r) * np.cos(lon_r) - y = R * np.cos(lat_r) * np.sin(lon_r) - z = R * np.sin(lat_r) - return x,y,z + subprocess.check_call(args) -def init_tree(lon_input, lat_input, lonmin, lonmax, latmin, latmax, dLon, dLat): - """ - Initializes a KD tree for nearest neighbor searching - """ - - lon_input = lon_input.flatten() - lat_input = lat_input.flatten() + # remove the temporary SCRIP files + os.remove(sourceScripFileName) + os.remove(destintionScripFileName) # }}} - if max(lon_input) < 2.*np.pi: - lon_input = np.rad2deg(lon_input) - if max(lat_input) < np.pi / 2.: - lat_input = np.rad2deg(lat_input) - if max(lon_input) > 180.: - inds = np.where(lon_input > 180.) - lon_input[inds] -= 360. +def remap(inFileName, outFileName, inWeightFileName, sourceFileType='mpas', + sourceLatVarName='lat', sourceLonVarName='lon', + variableList=None, overwrite=False): # {{{ + """ + Given a source file defining either an MPAS mesh or a lat-lon grid and + a destination file or set of arrays defining a lat-lon grid, constructs + a mapping file used for interpolation between the source and destination + grids. + + Parameters + ---------- + inFileName : str + The path to the file containing a data set on the source grid + + outFileName : str + The path where the data on the destination grid should be written + + inWeightFileName : str + The path to the mapping file containing interpolation weights + and indices between the source and destination grids + + sourceFileType : {'mpas', 'latlon'} + Whether the source file contains an MPAS mesh or a lat-lon grid + + sourceLatVarName, sourceLonVarName : str, optional + If `sourceFileType == 'latlon'`, the name of the latitude and longitude + variables in the source grid file + + variableList : list of str, optional + A list of variables to be mapped. By default, all variables are mapped + + overwrite : bool, optional + Whether the destination file should be overwritten if it already + exists. If `False`, and the destination file is already present, the + function does nothing and returns immediately + + Raises + ------ + OSError + If `ncremap` is not in the system path. + + Author + ------ + Xylar Asay-Davis + + Last Modified + ------------- + 03/14/2017 + """ - if lonmax > 180.: - sys.exit("longitude bounds must be between -180 and 180") + if not overwrite and os.path.exists(outFileName): + # a valid weight file already exists, so nothing to do + return - xs, ys, zs = lon_lat_to_cartesian(lon_input,lat_input) - tree = cKDTree(zip(xs, ys, zs)) + if find_executable('ncremap') is None: + raise OSError('ncremap not found. Make sure the latest nco package ' + 'is installed: \n conda install nco') - lonVals = np.arange(lonmin + dLon/2., lonmax + dLon/2., dLon) - latVals = np.arange(latmin + dLat/2., latmax + dLat/2., dLat) + args = ['ncremap', + '-R', '--rgr lat_nm={} --rgr lon_nm={}'.format(sourceLatVarName, + sourceLonVarName), + '-i', inFileName, + '-m', inWeightFileName, + '-o', outFileName] - latTarg, lonTarg = np.meshgrid(latVals,lonVals) - xt, yt, zt = lon_lat_to_cartesian(lonTarg.flatten(),latTarg.flatten()) + if sourceFileType == 'mpas': + # Note: using the -C (climatology) flag for now because otherwise + # ncremap tries to add a _FillValue attribute that might already + # be present and quits with an error + args.extend(['-P', 'mpas', '-C']) + if variableList is not None: + args.extend(['-v', ','.join(variableList)]) - d, inds = tree.query(zip(xt, yt, zt), k = 1) + subprocess.check_call(args) # }}} - return d, inds, lonTarg, latTarg -def interp_fields(field, d, inds, lonTarg): - """ - performs nearest neighbor interpolation - """ +def _get_temp_path(): # {{{ + '''Returns the name of a temporary NetCDF file''' + return '{}/{}.nc'.format(tempfile._get_default_tempdir(), + next(tempfile._get_candidate_names())) # }}} - return field.flatten()[inds].reshape(lonTarg.shape) +# vim: ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/interpolation/scrip.py b/mpas_analysis/shared/interpolation/scrip.py new file mode 100644 index 000000000..a114c2ea0 --- /dev/null +++ b/mpas_analysis/shared/interpolation/scrip.py @@ -0,0 +1,311 @@ +''' +Functions for creating SCRIP files, used to create mapping files + +Functions +--------- +mpas_file_to_scrip - create a SCRIP file for an MPAS mesh + +lat_lon_file_to_scrip - create a SCRIP file based on a lat-lon grid from a file + +lat_lon_array_to_scrip - create a SCRIP file based on lat and lon arrays + +Author +------ +Xylar Asay-Davis + +Last Modified +------------- +02/23/2017 +''' + +import netCDF4 +import numpy +import sys + + +def mpas_file_to_scrip(mpasFileName, scripFileName): # {{{ + ''' + Given an MPAS mesh file, create a SCRIP file based on the mesh. + + Parameters + ---------- + mpasFileName : str + The path of the file containing the source MPAS mesh + + scripFileName : str + The path to which the SCRIP file should be written + + Authors + ------ + Xylar Asay-Davis, Matthew Hoffman, Douglas Jacobsen + + Last Modified + ------------- + 02/20/2017 + ''' + inFile = netCDF4.Dataset(mpasFileName, 'r') + outFile = netCDF4.Dataset(scripFileName, 'w') + + # Get info from input file + latCell = inFile.variables['latCell'][:] + lonCell = inFile.variables['lonCell'][:] + latVertex = inFile.variables['latVertex'][:] + lonVertex = inFile.variables['lonVertex'][:] + verticesOnCell = inFile.variables['verticesOnCell'][:] + nEdgesOnCell = inFile.variables['nEdgesOnCell'][:] + nCells = len(inFile.dimensions['nCells']) + maxVertices = len(inFile.dimensions['maxEdges']) + areaCell = inFile.variables['areaCell'][:] + sphereRadius = float(inFile.sphere_radius) + + _create_scrip(outFile, grid_size=nCells, grid_corners=maxVertices, + grid_rank=1, units='radians') + + grid_area = outFile.createVariable('grid_area', 'f8', ('grid_size',)) + grid_area.units = 'radian^2' + # SCRIP uses square radians + grid_area[:] = areaCell[:] / (sphereRadius**2) + + outFile.variables['grid_center_lat'][:] = latCell[:] + outFile.variables['grid_center_lon'][:] = lonCell[:] + outFile.variables['grid_dims'][:] = nCells + outFile.variables['grid_imask'][:] = 1 + + # grid corners: + grid_corner_lon = numpy.zeros((nCells, maxVertices)) + grid_corner_lat = numpy.zeros((nCells, maxVertices)) + for iVertex in range(maxVertices): + cellIndices = numpy.arange(nCells) + # repeat the last vertex wherever iVertex > nEdgesOnCell + localVertexIndices = numpy.minimum(nEdgesOnCell-1, iVertex) + vertexIndices = verticesOnCell[cellIndices, localVertexIndices] - 1 + grid_corner_lat[cellIndices, iVertex] = latVertex[vertexIndices] + grid_corner_lon[cellIndices, iVertex] = lonVertex[vertexIndices] + + outFile.variables['grid_corner_lat'][:] = grid_corner_lat[:] + outFile.variables['grid_corner_lon'][:] = grid_corner_lon[:] + + # Update history attribute of netCDF file + if hasattr(inFile, 'history'): + newhist = '\n'.join([getattr(inFile, 'history'), + ' '.join(sys.argv[:])]) + else: + newhist = sys.argv[:] + setattr(outFile, 'history', newhist) + + inFile.close() + outFile.close() # }}} + + +def lat_lon_file_to_scrip(inFileName, scripFileName, latVarName='lat', + lonVarName='lon'): # {{{ + ''' + Given an MPAS mesh file, create a SCRIP file based on the mesh. + + Parameters + ---------- + inFileName : str + The path of the file containing a lat-lon grid + + scripFileName : str + The path to which the SCRIP file should be written + + latVarName, lonVarName : str, optional + The name of the latitude and longitude variables in the grid file + + Authors + ------ + Xylar Asay-Davis + + Last Modified + ------------- + 02/23/2017 + ''' + def interp_extrap_corner(inField): + outField = numpy.zeros(len(inField)+1) + outField[1:-1] = 0.5*(inField[0:-1] + inField[1:]) + # extrapolate the ends + outField[0] = 1.5*inField[0] - 0.5*inField[1] + outField[-1] = 1.5*inField[-1] - 0.5*inField[-2] + return outField + + inFile = netCDF4.Dataset(inFileName, 'r') + outFile = netCDF4.Dataset(scripFileName, 'w') + + # Get info from input file + lat = numpy.array(inFile.variables[latVarName][:], float) + lon = numpy.array(inFile.variables[lonVarName][:], float) + if 'degree' in inFile.variables[latVarName].units: + units = 'degrees' + else: + units = 'radians' + + # interp/extrap corners + lonCorner = interp_extrap_corner(lon) + latCorner = interp_extrap_corner(lat) + + _write_lat_lon_scrip(outFile, lat, lon, latCorner, lonCorner, units) + + # Update history attribute of netCDF file + if hasattr(inFile, 'history'): + newhist = '\n'.join([getattr(inFile, 'history'), + ' '.join(sys.argv[:])]) + else: + newhist = sys.argv[:] + setattr(outFile, 'history', newhist) + + inFile.close() + outFile.close() # }}} + + +def lat_lon_array_to_scrip(latCorner, lonCorner, scripFileName, + units='degrees'): # {{{ + ''' + Given an MPAS mesh file, create a SCRIP file based on the mesh. + + Parameters + ---------- + latCorner, lonCorner : str + One dimensional arrays defining the latitude and longitude coordinates + of grid corners on the destination grid + + scripFileName : str + The path to which the SCRIP file should be written + + units : {'degrees', 'radians'}, optional + The units of `latCorner` and `lonCorner` + + Authors + ------ + Xylar Asay-Davis + + Last Modified + ------------- + 02/23/2017 + ''' + + lon = 0.5*(lonCorner[0:-1] + lonCorner[1:]) + lat = 0.5*(latCorner[0:-1] + latCorner[1:]) + + outFile = netCDF4.Dataset(scripFileName, 'w') + + _write_lat_lon_scrip(outFile, lat, lon, latCorner, lonCorner, units) + + # Add history attribute to netCDF file + setattr(outFile, 'history', sys.argv[:]) + + outFile.close() # }}} + + +def _create_scrip(outFile, grid_size, grid_corners, grid_rank, units): # {{{ + ''' + Given a SCRIP files, creates common variables and writes common values used + in various types of SCRIP files. + + Parameters + ---------- + outFile : file pointer + A SCRIP file opened in write mode + + grid_size : int + The number of elements in the grid or mesh + + grid_corners : int + The number of corners in the grid or mesh + + grid_rank : int + The dimensionality of the grid (1 for mesh, 2 for grid) + + units : {'degrees', 'radians'} + The units for latitude and longitude + + Authors + ------ + Xylar Asay-Davis + + Last Modified + ------------- + 02/20/2017 + ''' + # Write to output file + # Dimensions + outFile.createDimension("grid_size", grid_size) + outFile.createDimension("grid_corners", grid_corners) + outFile.createDimension("grid_rank", grid_rank) + + # Variables + grid_center_lat = outFile.createVariable('grid_center_lat', 'f8', + ('grid_size',)) + grid_center_lat.units = units + grid_center_lon = outFile.createVariable('grid_center_lon', 'f8', + ('grid_size',)) + grid_center_lon.units = units + grid_corner_lat = outFile.createVariable('grid_corner_lat', 'f8', + ('grid_size', 'grid_corners')) + grid_corner_lat.units = units + grid_corner_lon = outFile.createVariable('grid_corner_lon', 'f8', + ('grid_size', 'grid_corners')) + grid_corner_lon.units = units + grid_imask = outFile.createVariable('grid_imask', 'i4', ('grid_size',)) + grid_imask.units = 'unitless' + outFile.createVariable('grid_dims', 'i4', ('grid_rank',)) # }}} + + +def _write_lat_lon_scrip(outFile, lat, lon, latCorner, lonCorner, + units): # {{{ + ''' + Given a SCRIP files, creates common variables and writes common values used + in various types of SCRIP files. + + Parameters + ---------- + outFile : file pointer + A SCRIP file opened in write mode + + lat, lon : 1D numpy.array + Latitude and longitude arrays for cell centers on the grid or mesh + + latCorner, lonCorner : 1D numpy.array + Latitude and longitude arrays for cell corners on the grid or mesh + + units : {'degrees', 'radians'} + The units for latitude and longitude + + Authors + ------ + Xylar Asay-Davis + + Last Modified + ------------- + 02/23/2017 + ''' + def unwrap_corners(inField): + outField = numpy.zeros(((inField.shape[0]-1)*(inField.shape[1]-1), 4)) + # corners are counterclockwise + outField[:, 0] = inField[0:-1, 0:-1].flat + outField[:, 1] = inField[0:-1, 1:].flat + outField[:, 2] = inField[1:, 1:].flat + outField[:, 3] = inField[1:, 0:-1].flat + + return outField + + nLat = len(lat) + nLon = len(lon) + + grid_size = nLat*nLon + + _create_scrip(outFile, grid_size=grid_size, grid_corners=4, + grid_rank=2, units=units) + + (Lon, Lat) = numpy.meshgrid(lon, lat) + (LonCorner, LatCorner) = numpy.meshgrid(lonCorner, latCorner) + + outFile.variables['grid_center_lat'][:] = Lat.flat + outFile.variables['grid_center_lon'][:] = Lon.flat + outFile.variables['grid_dims'][:] = [nLon, nLat] + outFile.variables['grid_imask'][:] = 1 + + outFile.variables['grid_corner_lat'][:] = unwrap_corners(LatCorner) + outFile.variables['grid_corner_lon'][:] = unwrap_corners(LonCorner) # }}} + +# vim: ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/io/namelist_streams_interface.py b/mpas_analysis/shared/io/namelist_streams_interface.py index 29d9e9474..a3d67adf6 100644 --- a/mpas_analysis/shared/io/namelist_streams_interface.py +++ b/mpas_analysis/shared/io/namelist_streams_interface.py @@ -1,10 +1,15 @@ #!/usr/bin/env python """ -Module of classes / routines to manipulate fortran namelist and streams +Module of classes/routines to manipulate fortran namelist and streams files. +Authors +------- Phillip Wolfram, Xylar Asay-Davis -Last modified: 12/05/2016 + +Last modified +------------- +04/01/2017 """ from lxml import etree @@ -13,7 +18,7 @@ from ..containers import ReadOnlyDict from .utility import paths -from ..timekeeping.Date import Date +from ..timekeeping.utility import string_to_datetime, string_to_relative_delta def convert_namelist_to_dict(fname, readonly=True): @@ -44,8 +49,13 @@ class NameList: Class for fortran manipulation of namelist files, provides read and write functionality + Authors + ------- Phillip Wolfram, Xylar Asay-Davis - Last modified: 11/02/2016 + + Last modified + ------------- + 02/06/2017 """ # constructor @@ -93,6 +103,44 @@ def getbool(self, key): return True else: return False + + def find_option(self, possibleOptions): + """ + If one (or more) of the names in ``possibleOptions`` is an option in + this namelist file, returns the first match. + + Parameters + ---------- + possibleOptions: list of str + A list of options to search for + + Returns + ------- + optionName : str + The name of an option from possibleOptions occurring in the + namelist file + + Raises + ------ + ValueError + If no match is found. + + Authors + ------- + Xylar Asay-Davis + + Last modified + ------------- + 04/01/2017 + """ + + for optionName in possibleOptions: + if optionName in self.nml.keys(): + return optionName + + raise ValueError('None of the possible options {} found in namelist file {}.'.format( + possibleOptions, self.fname)) + # }}} @@ -138,12 +186,48 @@ def read(self, streamname, attribname): return stream.get(attribname) return None - def readpath(self, streamName, startDate=None, endDate=None): + def readpath(self, streamName, startDate=None, endDate=None, + calendar=None): """ - Returns a list of files that match the file template in the - stream streamName with attribute attribName. If the startDate - and/or endDate are supplied, only files on or after the starDate and/or - on or before the endDate are included in the file list. + Given the name of a stream and optionally start and end dates and a + calendar type, returns a list of files that match the file template in + the stream. + + Parameters + ---------- + streamName : string + The name of a stream that produced the files + + startDate, endDate : string or datetime.datetime, optional + String or datetime.datetime objects identifying the beginning + and end dates to be found. + + Note: a buffer of one output interval is subtracted from startDate + and added to endDate because the file date might be the first + or last date contained in the file (or anything in between). + + calendar: {'gregorian', 'gregorian_noleap'}, optional + The name of one of the calendars supported by MPAS cores, and is + required if startDate and/or endDate are supplied + + Returns + ------- + fileList : list + A list of file names produced by the stream that fall between + the startDate and endDate (if supplied) + + Raises + ------ + ValueError + If no files from the stream are found. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/04/2017 """ template = self.read(streamName, 'filename_template') if template is None: @@ -168,8 +252,9 @@ def readpath(self, streamName, startDate=None, endDate=None): fileList = paths(path) if len(fileList) == 0: - raise ValueError("Path {} in streams file {} for '{}' not found.".format( - path, self.fname, streamName)) + raise ValueError( + "Path {} in streams file {} for '{}' not found.".format( + path, self.fname, streamName)) if (startDate is None) and (endDate is None): return fileList @@ -178,16 +263,33 @@ def readpath(self, streamName, startDate=None, endDate=None): if output_interval is None: # There's no file interval, so hard to know what to do # let's put a buffer of a year on each side to be safe - offsetDate = Date(dateString='0001-00-00', isInterval=True) + offsetDate = string_to_relative_delta(dateString='0001-00-00', + calendar=calendar) else: - offsetDate = Date(dateString=output_interval, isInterval=True) + offsetDate = string_to_relative_delta(dateString=output_interval, + calendar=calendar) if startDate is not None: # read one extra file before the start date to be on the safe side - startDate = Date(startDate) - offsetDate + if isinstance(startDate, str): + startDate = string_to_datetime(startDate) + try: + startDate -= offsetDate + except (ValueError, OverflowError): + # if the startDate would be out of range after subtracting + # the offset, we'll stick with the starDate as it is + pass + if endDate is not None: # read one extra file after the end date to be on the safe side - endDate = Date(endDate) + offsetDate + if isinstance(endDate, str): + endDate = string_to_datetime(endDate) + try: + endDate += offsetDate + except (ValueError, OverflowError): + # if the endDate would be out of range after adding + # the offset, we'll stick with the endDate as it is + pass # remove any path that's part of the template template = os.path.basename(template) @@ -204,7 +306,7 @@ def readpath(self, streamName, startDate=None, endDate=None): baseName = os.path.basename(fileName) dateEndIndex = len(baseName) - dateEndOffset fileDateString = baseName[dateStartIndex:dateEndIndex] - fileDate = Date(fileDateString) + fileDate = string_to_datetime(fileDateString) add = True if startDate is not None and startDate > fileDate: add = False @@ -231,18 +333,38 @@ def has_stream(self, streamName): def find_stream(self, possibleStreams): """ - If one (or more) of the names in possibleStreams is a stream in this - streams file, returns the first match. If no match is found, raises - a ValueError. - + If one (or more) of the names in ``possibleStreams`` is an stream in + this streams file, returns the first match. + + Parameters + ---------- + possibleStreams: list of str + A list of streams to search for + + Returns + ------- + streamName : str + The name of an stream from possibleOptions occurring in the + streams file + + Raises + ------ + ValueError + If no match is found. + + Authors + ------- Xylar Asay-Davis - Last modified: 12/07/2016 + + Last modified + ------------- + 04/01/2017 """ for streamName in possibleStreams: if self.has_stream(streamName): return streamName - - raise ValueError('Stream {} not found in streams file {}.'.format( - streamName, self.fname)) + + raise ValueError('None of the possible streams {} found in streams file {}.'.format( + possibleStreams, self.fname)) # vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/io/utility.py b/mpas_analysis/shared/io/utility.py index 5534cb344..d5fbe2982 100644 --- a/mpas_analysis/shared/io/utility.py +++ b/mpas_analysis/shared/io/utility.py @@ -1,17 +1,19 @@ -#!/usr/bin/env python """ IO utility functions -Phillip J. Wolfram -10/25/2016 +Phillip J. Wolfram, Xylar Asay-Davis + +Last Modified: 03/23/2017 """ import glob +import os + def paths(*args): - """ + """ Returns glob'd paths in list for arbitrary number of function arguments. - Note, each expanded set of paths is sorted. + Note, each expanded set of paths is sorted. Phillip J. Wolfram 10/25/2016 @@ -21,4 +23,69 @@ def paths(*args): paths += sorted(glob.glob(aargs)) return paths + +def make_directories(path): # {{{ + """ + Make the given path if it does not already exist. + + Returns the path unchanged. + + Author: Xylar Asay-Davis + Last Modified: 02/02/2017 + """ + + try: + os.makedirs(path) + except OSError: + pass + return path # }}} + + +def build_config_full_path(config, section, relativePathOption, + relativePathSection=None, + defaultPath=None): + """ + Returns a full path from a base directory and a relative path + + Parameters + ---------- + config : MpasAnalysisConfigParser object + configuration from which to read the path + + section : str + the name of a section in `config`, which must have an option + `baseDirectory` + + relativePathOption : str + the name of an option in `section` of the relative path within + `baseDirectory` (or possibly an absolute path) + + relativePathSection : str, optional + the name of a section for `relativePathOption` if not `section` + + defaultPath : str, optional + the name of a path to return if the resulting path doesn't exist. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 03/23/2017 + """ + if relativePathSection is None: + relativePathSection = section + + subDirectory = config.get(relativePathSection, relativePathOption) + if os.path.isabs(subDirectory): + fullPath = subDirectory + else: + fullPath = '{}/{}'.format(config.get(section, 'baseDirectory'), + subDirectory) + + if defaultPath is not None and not os.path.exists(fullPath): + fullPath = defaultPath + return fullPath + # vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/mpas_xarray/mpas_xarray.py b/mpas_analysis/shared/mpas_xarray/mpas_xarray.py index 91712d8dc..53f5e7150 100644 --- a/mpas_analysis/shared/mpas_xarray/mpas_xarray.py +++ b/mpas_analysis/shared/mpas_xarray/mpas_xarray.py @@ -1,61 +1,158 @@ -#!/usr/bin/env python +import numpy as np +import xarray +from functools import partial + +from ..timekeeping.utility import string_to_days_since_date, \ + string_to_datetime, days_to_datetime, datetime_to_days + """ -mpas_xarray.py -============================================================== -Wrapper to handle importing MPAS files into xarray. - - Module: - 1. converts MPAS time in various formats to xarray time. The MPAS time - variable is provided via - `preprocess_mpas(..., timestr='xtime', ...)`. - `timestr` can either be a single variable name or a pair of variable - names. In the latter case, each time variable is converted to an - xarray time and the mean of the two times is used as the final xarray - time. Each variable name in `timestr` can refer either to a float - array containing the the number of days since the start of the - simulation (e.g. `daysSinceStartOfSim`) or a string variable with the - date and time (e.g. `xtime`) in the usual MPAS format: - YYYY-MM-DD_hh:mm:ss - 2. provides capability to remove redundant time entries from reading of - multiple netCDF datasets via `remove_repeated_time_index`. - 3. provides capability to build a variable map between MPAS dycore variable - names and those used in mpas_analysis. This aids in supporting multiple - versions of MPAS dycores. The function `map_variable(...)` can be used - to find the associated MPAS dycore variable name in a dataset given a - variable name as used in mpas_analysis. The function - `rename_variables(...)` can be used to rename all variables in a variable - map from their MPAS dycore names to the corresponding mpas_analysis names. - - Example Usage: - ->>> from mpas_xarray import preprocess_mpas, remove_repeated_time_index ->>> ->>> ds = xarray.open_mfdataset('globalStats*nc', preprocess=preprocess_mpas) ->>> ds = remove_repeated_time_index(ds) +Utility functions for importing MPAS files into xarray. + +open_multifile_dataset : open an xarray data set from MPAS data files +subset_variables : Keep only a subset of variables in a dataset +preprocess : preprocess a single file of an xarray dataset +remove_repeated_time_index : remove redundant indices in the 'Time' coordinate +Authors +------- Phillip J. Wolfram, Xylar Asay-Davis -Last modified: 12/07/2016 + +Last modified +------------- +02/22/2017 """ -import datetime -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import xarray as xr +def open_multifile_dataset(fileNames, calendar, + simulationStartTime=None, + timeVariableName='xtime', + variableList=None, selValues=None, + iselValues=None): # {{{ + """ + Opens and returns an xarray data set given file name(s) and the MPAS + calendar name. + + Parameters + ---------- + fileNames : list of strings + A lsit of file paths to read + + calendar : {'gregorian', 'gregorian_noleap'}, optional + The name of one of the calendars supported by MPAS cores + + simulationStartTime : string, optional + The start date of the simulation, used to convert from time variables + expressed as days since the start of the simulation to days since the + reference date. `simulationStartTime` takes one of the following + forms: + 0001-01-01 + + 0001-01-01 00:00:00 + + simulationStartTime is only required if the MPAS time variable + (identified by timeVariableName) is a number of days since the + start of the simulation. + + timeVariableName : string, optional + The name of the time variable (typically 'Time' if using a variableMap + or 'xtime' if not using a variableMap) + + variableList : list of strings, optional + If present, a list of variables to be included in the data set + + selectCorrdValues : dict, optional + A dictionary of coordinate names (keys) and values or arrays of + values used to slice the variales in the data set. See + xarray.dataset.sel() for details on how this dictonary is used. + An example: + selectCorrdValues = {'cellLon': 180.0} + + iselValues : dict, optional + A dictionary of coordinate names (keys) and indices, slices or + arrays of indices used to slice the variales in the data set. See + xarray.dataset.isel() for details on how this dictonary is used. + An example: + iselValues = {'nVertLevels': slice(0, 3), + 'nCells': cellIDs} + + Returns + ------- + ds : ``xarray.Dataset`` + + Raises + ------ + TypeError + If the time variable has an unsupported type (not a date string or + a floating-pont number of days since the start of the simulation). + + ValueError + If the time variable is not found in the data set or if the time + variable is a number of days since the start of the simulation but + simulationStartTime is None. + + Author + ------ + Xylar Asay-Davis -def subset_variables(ds, vlist): # {{{ + Last modified + ------------- + 02/17/2017 """ - Reduces an xarray dataset ds to only contain the variables in vlist. - Phillip J. Wolfram - 01/10/2017 + preprocess_partial = partial(preprocess, + calendar=calendar, + simulationStartTime=simulationStartTime, + timeVariableName=timeVariableName, + variableList=variableList, + selValues=selValues, + iselValues=iselValues) + + ds = xarray.open_mfdataset(fileNames, + preprocess=preprocess_partial, + decode_times=False, concat_dim='Time') + + ds = remove_repeated_time_index(ds) + + return ds # }}} + + +def subset_variables(ds, variableList): # {{{ + """ + Given a data set and a list of variable names, returns a new data set that + contains only variables with those names. + + Parameters + ---------- + ds : xarray.DataSet object + The data set from which a subset of variables is to be extracted. + + variableList : string or list of strings + The names of the variables to be extracted. + + Returns + ------- + ds : xarray.DataSet object + A copy of the original data set with only the variables in + variableList. + + Raises + ------ + ValueError + If the resulting data set is empty. + + Authors + ------- + Phillip J. Wolfram, Xylar Asay-Davis + + Last modified + ------------- + 02/16/2017 """ allvars = ds.data_vars.keys() # get set of variables to drop (all ds variables not in vlist) - dropvars = set(allvars) - set(vlist) + dropvars = set(allvars) - set(variableList) # drop spurious variables ds = ds.drop(dropvars) @@ -69,50 +166,171 @@ def subset_variables(ds, vlist): # {{{ # drop spurious coordinates ds = ds.drop(dropcoords) - assert len(ds.variables.keys()) > 0, 'MPAS_XARRAY ERROR: Empty dataset is returned.\n'\ - 'Variables {}\nare not found within the dataset variables: {}.'\ - .format(vlist, allvars) + if len(ds.data_vars.keys()) == 0: + raise ValueError( + 'Empty dataset is returned.\n' + 'Variables {}\n' + 'are not found within the dataset ' + 'variables: {}.'.format(variableList, allvars)) return ds # }}} -def assert_valid_datetimes(datetimes, yearoffset): # {{{ +def preprocess(ds, calendar, simulationStartTime, timeVariableName, + variableList, selValues, iselValues): # {{{ + """ + Builds correct time specification for MPAS, allowing a date offset + because the time must be between 1678 and 2262 based on the xarray + library. Also, if slicing information (`selValues` and/or + `iselValues`) was provided in `openMultifileDataSet`, this + function performs the appropriate slicing on the data set. + + Parameters + ---------- + ds : xarray.DataSet object + The data set containing an MPAS time variable to be used to build + an xarray time coordinate. + + calendar : {'gregorian', 'gregorian_noleap'} + The name of one of the calendars supported by MPAS cores + + simulationStartTime : string, optinal + The start date of the simulation, used to convert from time + variables expressed as days since the start of the simulation to + days since the reference date. `simulationStartTime` takes one + of the following forms: + 0001-01-01 + + 0001-01-01 00:00:00 + + simulationStartTime is only required if the MPAS time variable + (identified by timeVariableName) is a number of days since the + start of the simulation. + + timeVariableName : string + The name of the time variable (typically 'Time' if using a variableMap + or 'xtime' if not using a variableMap) + + variableList : list of strings + If present, a list of variables to be included in the data set + + selectCorrdValues : dict + A dictionary of coordinate names (keys) and values or arrays of + values used to slice the variales in the data set. See + xarray.DataSet.sel() for details on how this dictonary is used. + An example: + selectCorrdValues = {'cellLon': 180.0} + + iselValues : dict + A dictionary of coordinate names (keys) and indices, slices or + arrays of indices used to slice the variales in the data set. See + xarray.DataSet.isel() for details on how this dictonary is used. + An example: + iselValues = {'nVertLevels': slice(0, 3), + 'nCells': cellIDs} + + Returns + ------- + ds : xarray.DataSet object + A copy of the data set with the time coordinate set and which + has been sliced. + + Authors + ------- + Phillip J. Wolfram, Milena Veneziani, Luke van Roekel + and Xylar Asay-Davis + + Last modified + ------------- + 04/06/2017 """ - Ensure that datatimes are compatable with xarray - Phillip J. Wolfram - 04/20/2016 + ds = _parse_dataset_time(ds=ds, + inTimeVariableName=timeVariableName, + calendar=calendar, + simulationStartTime=simulationStartTime, + outTimeVariableName='Time', + referenceDate='0001-01-01') + + if variableList is not None: + ds = subset_variables(ds, + _ensure_list(variableList)) + + _assert_valid_selections(ds, selValues, + iselValues) + + if selValues is not None: + ds = ds.sel(**selValues) + + if iselValues is not None: + ds = ds.isel(**iselValues) + + return ds # }}} + + +def remove_repeated_time_index(ds): # {{{ """ - assert datetimes[0].year > 1678, \ - 'ERROR: yearoffset={}'.format(yearoffset) + \ - ' must be large enough to ensure datetimes larger than year 1678' - assert datetimes[-1].year < 2262, \ - 'ERROR: yearoffset={}'.format(yearoffset) + \ - ' must be small enough to ensure datetimes smaller than year 2262' + Remove repeated times from xarray dataset. - return # }}} + Parameters + ---------- + ds : xarray.DataSet object + The data set potentially containing repeated time indices. + + Returns + ------- + ds : xarray.DataSet object + A copy of the original data set with any repeated time indices removed. + Authors + ------- + Phillip J. Wolfram, Xylar Asay-Davis -def assert_valid_selections(ds, selvals, iselvals): # {{{ + Last modified + ------------- + 02/11/2017 """ - Ensure that dataset selections are compatable. + # get repeated indices + times = ds.Time.values + indices = range(len(times)) + uniqueTimes = set() + remove = [] + for timeIndex, time in enumerate(times): + if time not in uniqueTimes: + uniqueTimes.add(time) + else: + remove.append(timeIndex) + + # remove repeaded indices, working backwards from the last + remove.reverse() + for timeIndex in remove: + indices.pop(timeIndex) - It is possible selVals and iselVals may conflict, e.g., selVals restricts - the dataset to a point where iselvals is unable to be satisfied, hence a - check is needed to make sure that keys in selvals and iselvals are unique. - Additionally, keys for selvals and iselvals are tested to make sure they - are dataset dimensions that can be used for selection. + # remove repeated indices + ds = ds.isel(Time=indices) - Phillip J. Wolfram - Last modified: 12/07/2016 + return ds # }}} + + +def _assert_valid_selections(ds, selvals, iselvals): # {{{ """ + Ensure that dataset selections are compatable. - if (selvals is not None) and (iselvals is not None): - duplicatedkeys = len(np.intersect1d(selvals.keys(), iselvals.keys())) - assert len(duplicatedkeys) == 0, \ - 'Duplicated selection of variables {} was found! ' \ - 'Selection is ambiguous.'.format(duplicatedkeys) + It is possible selVals and iselVals may conflict, e.g., selVals + restricts the dataset to a point where iselvals is unable to be + satisfied, hence a check is needed to make sure that keys in selvals + and iselvals are unique. Additionally, keys for selvals and iselvals + are tested to make sure they are dataset dimensions that can be used + for selection. + Authors + ------- + Phillip J. Wolfram, Xylar Asay-Davis + + Last modified + ------------- + 02/10/2017 + """ def test_vals_in_ds(vals, dims): if vals is not None: for val in vals.keys(): @@ -120,18 +338,30 @@ def test_vals_in_ds(vals, dims): '{} is not a dimension in the dataset ' \ 'that can be used for selection.'.format(val) + if (selvals is not None) and (iselvals is not None): + duplicatedkeys = len(np.intersect1d(selvals.keys(), + iselvals.keys())) + assert len(duplicatedkeys) == 0, \ + 'Duplicated selection of variables {} was found! ' \ + 'Selection is ambiguous.'.format(duplicatedkeys) + test_vals_in_ds(selvals, ds.dims) test_vals_in_ds(iselvals, ds.dims) return # }}} -def ensure_list(alist): # {{{ +def _ensure_list(alist): # {{{ """ Ensure that variables used as a list are actually lists. - Phillip J. Wolfram - 09/08/2016 + Authors + ------- + Phillip J. Wolfram, Xylar Asay-Davis + + Last modified + ------------- + 02/10/2017 """ if isinstance(alist, str): @@ -141,286 +371,220 @@ def ensure_list(alist): # {{{ return alist # }}} -def get_datetimes(ds, timestr, yearoffset): # {{{ +def _parse_dataset_time(ds, inTimeVariableName, calendar, + simulationStartTime, outTimeVariableName, + referenceDate): # {{{ """ - Computes a list of datetimes from the time variable in the dataset ds with - variable name (or a list of 2 names) given by timestr, typically one of - 'daysSinceStartOfSim', 'xtime', or ['xtime_start', 'xtime_end']. - - The variable(s) pointed to by timestr should contain time information as a - date string, a floating-point number of days or a number of days - represented as a pandas timedelta (in ns). The result is a list of - datetimes corresponding to the input dates offset as appropriate by the - yearoffset. - + A helper function for computing a time coordinate from an MPAS time + variable. Given a data set and a time variable name (or tuple of 2 + time names), returns a new data set with time coordinate + `outTimeVariableName` filled with days since `referenceDate` + + Parameters + ---------- + ds : xarray.DataSet object + The data set containing an MPAS time variable to be used to build + an xarray time coordinate. + + inTimeVariableName : string or tuple or list of strings + The name of the time variable in the MPAS data set that will be + used to build the 'Time' coordinate. The array(s) named by + inTimeVariableName should contain date strings or the number of + days since the start of the simulation. Typically, + inTimeVariableName is one of {'daysSinceStartOfSim','xtime'}. + If a list of two variable + names is provided, times from the two are averaged together to + determine the value of the time coordinate. In such cases, + inTimeVariableName is typically {['xtime_start', 'xtime_end']}. + + calendar : {'gregorian', 'gregorian_noleap'} + The name of one of the calendars supported by MPAS cores + + + simulationStartTime : string + The start date of the simulation, used to convert from time variables + expressed as days since the start of the simulation to days since the + reference date. `simulationStartTime` takes one of the following + forms: + 0001-01-01 + + 0001-01-01 00:00:00 + + simulationStartTime is only required if the MPAS time variable + (identified by timeVariableName) is a number of days since the + start of the simulation. + + outTimeVariableName : string + The name of the coordinate to assign times to, typically 'Time'. + + referenceDate : string + The reference date for the time variable, typically '0001-01-01', + taking one of the following forms: + 0001-01-01 + + 0001-01-01 00:00:00 + + Returns + ------- + dataset : xarray.dataset object + A copy of the input data set with the `outTimeVariableName` + coordinate containing the time coordinate parsed from + `inTimeVariableName`. + + Raises + ------ + TypeError + If the time variable has an unsupported type (not a date string + or a floating-pont number of days since the start of the simulatio). + ValueError + If the time variable is a number of days since the start of the + simulation but simulationStartTime is None. + + Authors + ------- Xylar Asay-Davis - Last modified: 12/05/2016 + + Last modified + ------------- + 02/16/2017 """ - if isinstance(timestr, (tuple, list)): + if isinstance(inTimeVariableName, (tuple, list)): # we want to average the two - assert(len(timestr) == 2) - starts = get_datetimes(ds, timestr[0], yearoffset) - ends = get_datetimes(ds, timestr[1], yearoffset) - datetimes = [starts[i] + (ends[i] - starts[i])/2 - for i in range(len(starts))] - return datetimes - - time_var = ds[timestr] - - if time_var.dtype == '|S64': - # this is a variable like date strings like 'xtime' - time = [''.join(atime).strip() for atime in time_var.values] - datetimes = [datetime.datetime(yearoffset + int(x[:4]), int(x[5:7]), - int(x[8:10]), int(x[11:13]), - int(x[14:16]), int(x[17:19])) - for x in time] - elif time_var.dtype == 'float64': - # this array contains floating-point days like 'daysSinceStartOfSim' - start = datetime.datetime(year=yearoffset+1, month=1, day=1) - datetimes = [start + datetime.timedelta(x) - for x in time_var.values] - elif time_var.dtype == 'timedelta64[ns]': - # this array contains a variable like 'daysSinceStartOfSim' as a - # timedelta64 - start = datetime.datetime(year=yearoffset+1, month=1, day=1) - datetimes = [start + x for x in - pd.to_timedelta(time_var.values, unit='ns')] - else: - raise TypeError("time_var of unsupported type {}".format( - time_var.dtype)) - - return datetimes # }}} - - -def map_variable(variable_name, ds, varmap): # {{{ - """ - Find the variable (or list of variables) in dataset ds that map to the - mpas_analysis variable given by variable_name. + assert(len(inTimeVariableName) == 2) + + dsStart = _parse_dataset_time( + ds=ds, + inTimeVariableName=inTimeVariableName[0], + calendar=calendar, + simulationStartTime=simulationStartTime, + outTimeVariableName=outTimeVariableName, + referenceDate=referenceDate) + dsEnd = _parse_dataset_time( + ds=ds, + inTimeVariableName=inTimeVariableName[1], + calendar=calendar, + simulationStartTime=simulationStartTime, + outTimeVariableName=outTimeVariableName, + referenceDate=referenceDate) + starts = dsStart[outTimeVariableName].values + ends = dsEnd[outTimeVariableName].values + + # replace the time in starts with the mean of starts and ends + dsOut = dsStart.copy() + + dsOut.coords['startTime'] = (outTimeVariableName, starts) + dsOut.coords['endTime'] = (outTimeVariableName, ends) + + dsOut.coords[outTimeVariableName] = (outTimeVariableName, + [starts[i] + + (ends[i] - starts[i])/2 + for i in range(len(starts))]) - varmap is a dictionary with keys that are variable names used by - MPAS-Analysis and values that are lists of possible names for the same - variable in the MPAS dycore that produced the data set (which may differ - between versions). + else: - Xylar Asay-Davis - 12/04/2016 - """ - possible_variables = varmap[variable_name] - for var in possible_variables: - if isinstance(var, (list, tuple)): - allFound = True - for subvar in var: - if subvar not in ds.data_vars.keys(): - allFound = False - break - if allFound: - return var - - elif var in ds.data_vars.keys(): - return var - - raise ValueError('Variable {} could not be mapped. None of the ' - 'possible mapping variables {}\n match any of the ' - 'variables in {}.'.format( - variable_name, possible_variables, - ds.data_vars.keys())) - # }}} - - -def rename_variables(ds, varmap, timestr): # {{{ - """ - Rename all variables in ds based on which are found in varmap. + # there is just one time variable (either because we're recursively + # calling the function or because we're not averaging). + + # The contents of the time variable is expected to be either a string + # (|S64) or a float (meaning days since start of the simulation). + + timeVar = ds[inTimeVariableName] + + if timeVar.dtype == '|S64': + # this is an array of date strings like 'xtime' + # convert to string + timeStrings = [''.join(xtime).strip() for xtime in timeVar.values] + days = string_to_days_since_date(dateString=timeStrings, + referenceDate=referenceDate, + calendar=calendar) + + elif timeVar.dtype == 'float64': + # this array contains floating-point days like + # 'daysSinceStartOfSim' + + if simulationStartTime is None: + raise ValueError('MPAS time variable {} appears to be a number of days since start \n' + 'of sim but simulationStartTime was not supplied.'.format(inTimeVariableName)) + + if (string_to_datetime(referenceDate) == + string_to_datetime(simulationStartTime)): + days = timeVar.values + else: + # a conversion may be required + dates = days_to_datetime(days=timeVar.values, + referenceDate=simulationStartTime, + calendar=calendar) + days = datetime_to_days(dates=dates, + referenceDate=referenceDate, + calendar=calendar) + + elif timeVar.dtype == 'timedelta64[ns]': + raise TypeError('timeVar of unsupported type {}. This is likely because xarray.open_dataset \n' + 'was called with decode_times=True, which can mangle MPAS times.'.format(timeVar.dtype)) + else: + raise TypeError("timeVar of unsupported type {}".format( + timeVar.dtype)) - varmap is a dictionary with keys that are variable names used by - MPAS-Analysis and values that are lists of possible names for the same - variable in the MPAS dycore that produced the data set (which may differ - between versions). + dsOut = ds.copy() + dsOut.coords[outTimeVariableName] = (outTimeVariableName, days) - timestr is points to the time variable(s), which are treated as a special - case since they may need to be averaged. + return dsOut # }}} - Returns a new timestr after mapping in timestr is in varmap, otherwise - returns timestr unchanged. - Xylar Asay-Davis - 12/08/2016 +def process_chunking(ds, chunking): # {{{ """ + Computes chunking for a dataset. - submap = varmap - if timestr in varmap: - # make a copy of varmap and remove timestr - submap = varmap.copy() - submap.pop(timestr, None) + Parameters + ---------- + ds : ``xarray.Dataset`` + Input dataset to be chunked. - rename_dict = {} - for ds_var in ds.data_vars: - for map_var in submap: - rename_list = varmap[map_var] - if ds_var in rename_list: - rename_dict[ds_var] = map_var - break + chunking : None, int, dict + If chunking is an integer it specifies the maximum chunking rule, + otherwise if None do not perform chunking. If a chunking is a dict use + dictionary values for chunking. - ds.rename(rename_dict, inplace=True) + Returns + ------- + ds : ``xarray.Dataset`` - if timestr in varmap: - timestr = map_variable(timestr, ds, varmap) + Raises + ------ - return timestr # }}} + ValueError + If chunking value used is not an acceptable value. + Author + ------ + Phillip J. Wolfram -def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None, - timestr='xtime', yearoffset=1849, - varmap=None): # {{{ + Last modified + ------------- + 04/06/2017 """ - Builds correct time specification for MPAS, allowing a date offset because - the time must be between 1678 and 2262 based on the xarray library. - - The time specification is relevant for so-called time-slice model - experiments, in which CO2 and greenhouse gas conditions are kept - constant over the entire model simulation. Typical time-slice experiments - are run with 1850 (pre-industrial) conditions and 2000 (present-day) - conditions. Hence, a default date offset is chosen to be yearoffset=1849, - (year 0001 of an 1850 run will correspond with Jan 1st, 1850). - - The data set is assumed to have an array of date strings with variable - name (or list of 2 names) given by timestr, typically one of - 'daysSinceStartOfSim', 'xtime', or ['xtime_start', 'xtime_end']. - - The onlyvars option reduces the dataset to only include variables in the - onlyvars list. If onlyvars=None, include all dataset variables. - - iselvals and selvals provide index and value-based slicing operations for - individual datasets prior to their merge via xarray. - iselvals is a dictionary, e.g. iselvals = {'nVertLevels': slice(0, 3), - 'nCells': cellIDs} - selvals is a dictionary, e.g. selvals = {'cellLon': 180.0} - - varmap is an optional dictionary that can be used to rename - variables in the data set to standard names expected by mpas_analysis. - If timestr is present in varmap, the values of varmap[timestr] - will be used to determine the associated time variable in ds. However, the - variable(s) associated with timestr in ds will not be renamed. This is - because there may be more than one variable in ds that maps to timestr - (e.g. xtime_start and xtime_end), so that a one-to-one mapping is not - possible for this variable. - - Phillip J. Wolfram, Milena Veneziani, Luke van Roekel and Xylar Asay-Davis - Last modified: 12/05/2016 - """ - - if varmap is not None: - timestr = rename_variables(ds, varmap, timestr) - - datetimes = get_datetimes(ds, timestr, yearoffset) - - assert_valid_datetimes(datetimes, yearoffset) - - # append the corret time information - ds.coords['Time'] = datetimes - # record the yroffset - ds.attrs.__setitem__('time_yearoffset', str(yearoffset)) - - if onlyvars is not None: - ds = subset_variables(ds, ensure_list(onlyvars)) - - assert_valid_selections(ds, selvals, iselvals) - if selvals is not None: - ds = ds.sel(**selvals) + if isinstance(chunking, int): + chunks = {} + for name in ds.chunks.keys(): + chunklim = np.asarray(ds.chunks[name]).max() + chunks[name] = np.minimum(chunking, chunklim) - if iselvals is not None: - ds = ds.isel(**iselvals) - - return ds # }}} - - -def remove_repeated_time_index(ds): # {{{ - """ - Remove repeated times from xarray dataset. + ds = ds.chunk(chunks) - Phillip J. Wolfram - 12/01/2015 - """ - # get repeated indices - time = ds.Time.values - index = range(len(time)) - uniquetime = set() - remove = [] - for tid, atime in enumerate(time): - if atime not in uniquetime: - uniquetime.add(atime) - else: - remove.append(tid) + elif isinstance(chunking, dict): + ds = ds.chunk(chunking) - remove.reverse() - for tid in remove: - index.pop(tid) + # if chunking is None don't do any chunking + elif chunking is None: + pass - # remove repeated indices - ds = ds.isel(Time=index) + else: + raise ValueError( + 'Chunking parameter choice is not understood ' + 'for {} of type {}\n'.format(chunking, type(chunking))) return ds # }}} - - -def test_load_mpas_xarray_datasets(path): # {{{ - ds = xr.open_mfdataset(path, preprocess=lambda x: - preprocess_mpas(x, yearoffset=1850)) - ds = remove_repeated_time_index(ds) - - # make a simple plot from the data - ds.Time.plot() - plt.show() - - return # }}} - - -def test_load_mpas_xarray_timeSeriesStats_datasets(path): # {{{ - timestr = 'timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1' - ds = xr.open_mfdataset(path, preprocess=lambda x: - preprocess_mpas(x, - timeSeriesStats=True, - timestr=timestr)) - ds = remove_repeated_time_index(ds) - ds2 = xr.open_mfdataset(path, preprocess=lambda x: - preprocess_mpas(x, yearoffset=1850)) - ds2 = remove_repeated_time_index(ds2) - - # make a simple plot from the data - def plot_data(ds): - var = ds["timeSeriesStatsMonthly_avg_iceAreaCell_1"] - return var.where(var > 0).mean('nCells').plot() - - plot_data(ds) - plot_data(ds2) - plt.title("Curve centered around right times (b) \n " + - "Curve shifted towards end of avg period (g)") - plt.show() - - return # }}} - - -if __name__ == "__main__": - from optparse import OptionParser - - parser = OptionParser() - parser.add_option("-f", "--file", dest="inputfilename", - help="files to be opened with xarray, could be of form " - "'output*.nc'", - metavar="FILE") - parser.add_option("--istimeavg", dest="istimeavg", - help="option to use the preprocess for " - "timeSeriesStatsAM fields") - - options, args = parser.parse_args() - if not options.inputfilename: - parser.error("Input filename or expression ('-f') is a required" - "input, e.g. -f 'output*.npz'") - - if not options.istimeavg: - test_load_mpas_xarray_datasets(options.inputfilename) - else: - test_load_mpas_xarray_timeSeriesStats_datasets(options.inputfilename) - # vim: ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/plot/plotting.py b/mpas_analysis/shared/plot/plotting.py index ee72a1834..3636cc590 100644 --- a/mpas_analysis/shared/plot/plotting.py +++ b/mpas_analysis/shared/plot/plotting.py @@ -1,40 +1,562 @@ +""" +Plotting utilities, including routines for plotting: + * time series (and comparing with reference data sets) + * regridded horizontal fields (and comparing with reference data sets) + * vertical sections on native grid + * NINO34 time series and spectra + +Authors +------- +Xylar Asay-Davis, Milena Veneziani, Luke Van Roekel + +Last Modified +------------- +04/07/2017 +""" + import matplotlib.pyplot as plt +import matplotlib.colors as cols +import xarray as xr import pandas as pd from mpl_toolkits.basemap import Basemap import matplotlib.colors as cols +from matplotlib.ticker import FuncFormatter, FixedLocator import numpy as np +from functools import partial + +from ..timekeeping.utility import days_to_datetime, date_to_days + +from ..constants import constants + + +def nino34_spectra_plot(config, f, ninoSpectra, confidence95, confidence99, + redNoiseSpectra, fObs, f30, ninoObs, conf95Obs, conf99Obs, + redNoiseObs, nino30yr, conf9530, conf9930, redNoise30, + title, modelTitle, obsTitle, + fileout, linewidths, xlabel='Period (years)', + ylabel=r'Power ($^o$C / cycles mo$^{-1}$)', titleFontSize=None, + figsize=(9, 21), dpi=300): + """ + Plots the nino34 time series and power spectra in an image file + Parameters + ---------- + config : instance of ConfigParser + the configuration, containing a [plot] section with options that + control plotting + + f : numpy.array + periods to plot on x-axis + + ninoSpectra : xarray.dataArray object + nino34 power spectra + + confidence95 : numpy.array + 95% confidence level based on chi squared test + + confidence99 : numpy.array + 99% confidence level based on chi squared test + + redNoiseSpectra : numpy.array + red noise fit to the ninoSpectra + + fObs : numpy.array + periods to plot on x-axis for observations + + ninoObs : xarray.dataArray object + nino34 power spectra from the full observational record + + conf95Obs : numpy.array + 95% confidence level based on chi squared for observations + + conf99Obs : numpy.array + 99% confidence level based on chi squared for observations + + redNoiseObs : numpy.array + red noise fit to ninoObs + + nino30yr : xarray.dataArray object + power spectra of the last 30 years of the observational record + + title : str + the title of the plot + + modelTitle : str + the title of model panel + + obsTitle : str + the title of the obs panel + + xLabel, yLabel : str + axis labels + + fileout : str + the file name to be written + + linewidths : control line width + + titleFontSize : int, optional + the size of the title font + + figsize : tuple of float, optional + the size of the figure in inches + + dpi : int, optional + the number of dots per inch of the figure + + Author + ------ + Luke Van Roekel + + Last Modified + ------------- + 04/07/2017 + """ + + fig = plt.figure(figsize=figsize, dpi=dpi) + + if titleFontSize is None: + titleFontSize = config.get('plot', 'titleFontSize') + + axis_font = {'size': config.get('plot', 'axisFontSize')} + title_font = {'size': titleFontSize, + 'color': config.get('plot', 'titleFontColor'), + 'weight': config.get('plot', 'titleFontWeight')} + if title is not None: + fig.suptitle(title, y=0.92, **title_font) + + ax1 = plt.subplot(3, 1, 1) + + plt.plot(fObs[2:-3], ninoObs[2:-3], 'k', linewidth=linewidths) + plt.plot(fObs[2:-3], redNoiseObs[2:-3], 'r', linewidth=linewidths) + plt.plot(fObs[2:-3], conf95Obs[2:-3], 'b', linewidth=linewidths) + plt.plot(fObs[2:-3], conf99Obs[2:-3], 'g', linewidth=linewidths) + plt.xlim(10, 1) + + plt.legend(['Nino34 spectra (Full Record)', 'Red noise fit', + '95% confidence threshold', '99% confidence threshold'], + loc='upper right') + maxObs = _plot_size_y_axis(plt, fObs, c1=conf99Obs, c2=redNoiseObs) + max30 = _plot_size_y_axis(plt, f30, c1=conf9930, c2=redNoise30) + maxModel = _plot_size_y_axis(plt, f, c1=ninoSpectra.values, c2=confidence99, + c3=redNoiseSpectra) + + maxYval = max(maxObs, max30, maxModel) + plt.ylim(0, 0.9*maxYval) + + if obsTitle is not None: + plt.title(obsTitle+' (Full Record)', **title_font) + if xlabel is not None: + plt.xlabel(xlabel, **axis_font) + if ylabel is not None: + plt.ylabel(ylabel, **axis_font) + + ax2 = plt.subplot(3, 1, 2) + + plt.plot(f30[2:-3], nino30yr[2:-3], 'k', linewidth=linewidths) + plt.plot(f30[2:-3], redNoise30[2:-3], 'r', linewidth=linewidths) + plt.plot(f30[2:-3], conf9530[2:-3], 'b', linewidth=linewidths) + plt.plot(f30[2:-3], conf9930[2:-3], 'g', linewidth=linewidths) + plt.xlim(10, 1) + plt.ylim(0, 0.9*maxYval) + + plt.legend(['Nino34 spectra (1976 - 2016)', 'Red noise fit', + '95% confidence threshold', '99% confidence threshold'], + loc='upper right') + + if obsTitle is not None: + plt.title(obsTitle+' (1976-2016)', **title_font) + if xlabel is not None: + plt.xlabel(xlabel, **axis_font) + if ylabel is not None: + plt.ylabel(ylabel, **axis_font) + + ax3 = plt.subplot(3, 1, 3) + plt.plot(f[2:-3], ninoSpectra[2:-3], 'k', linewidth=linewidths) + plt.plot(f[2:-3], redNoiseSpectra[2:-3], 'r', linewidth=linewidths) + plt.plot(f[2:-3], confidence95[2:-3], 'b', linewidth=linewidths) + plt.plot(f[2:-3], confidence99[2:-3], 'g', linewidth=linewidths) + plt.xlim(10, 1) + plt.ylim(0, 0.9*maxYval) + + # add legend + plt.legend(['Nino34 index spectra', 'Red noise fit', + '95% confidence threshold', '99% confidence threshold'], + loc='upper right') + + if modelTitle is not None: + plt.title(modelTitle, **title_font) + if xlabel is not None: + plt.xlabel(xlabel, **axis_font) + if ylabel is not None: + plt.ylabel(ylabel, **axis_font) + if fileout is not None: + fig.savefig(fileout, dpi=dpi, bbox_inches='tight', pad_inches=0.1) + + if not config.getboolean('plot', 'displayToScreen'): + plt.close() + + +def nino34_timeseries_plot(config, nino34Index, nino34Obs, nino3430, title, + modelTitle, obsTitle, fileout, linewidths, + calendar, xlabel='Time [years]', ylabel='[$^\circ$C]', + titleFontSize=None, figsize=(12, 28), dpi=300, + maxXTicks=20): + """ + Plots the nino34 time series and power spectra in an image file + + Parameters + ---------- + config : instance of ConfigParser + the configuration, containing a [plot] section with options that + control plotting + + nino34Index : xarray.dataArray + nino34 timeseries to plot + + nino34Obs : xarray.dataArray + nino34 observation + + nino3430 : xarray.dataArray + subset of nino34 observations + + title : str + the title of the plot + + obsTitle : str + title of observational plot + + modelTitle : str + title of model plot + + xLabel, yLabel : str + axis labels + + fileout : str + the file name to be written + + lineWidths : list of str + control line width + + titleFontSize : int, optional + the size of the title font -def timeseries_analysis_plot(config, dsvalues, N, title, xlabel, ylabel, fileout, - lineStyles, lineWidths, title_font_size=None, - figsize=(15,6), dpi=300): + figsize : tuple of float, optional + the size of the figure in inches + dpi : int, optional + the number of dots per inch of the figure + + maxXTicks : int, optional + the maximum number of tick marks that will be allowed along the x axis. + This may need to be adjusted depending on the figure size and aspect + ratio. + + Author + ------ + Luke Van Roekel + + Last Modified + ------------- + 04/07/2017 + """ + fig = plt.figure(figsize=figsize, dpi=dpi) + + if titleFontSize is None: + titleFontSize = config.get('plot', 'titleFontSize') + + axis_font = {'size': config.get('plot', 'axisFontSize')} + title_font = {'size': titleFontSize, + 'color': config.get('plot', 'titleFontColor'), + 'weight': config.get('plot', 'titleFontWeight')} + if title is not None: + fig.suptitle(title, y=0.92, **title_font) + + # Plot Nino34 Observation Time series + plt.subplot(3, 1, 1) + _plot_nino_timeseries(plt, nino34Obs[2:-3].values, nino34Obs.Time[2:-3].values, + xlabel, ylabel, obsTitle+' (Full Record)', calendar, + axis_font, linewidths, maxXTicks) + + # Plot subset of the observational data set + plt.subplot(3, 1, 2) + _plot_nino_timeseries(plt, nino3430.values, nino3430.Time.values, + xlabel, ylabel, obsTitle+' (1976 - 2016)', calendar, + axis_font, linewidths, maxXTicks) + + # Plot Nino34 model time series + plt.subplot(3, 1, 3) + _plot_nino_timeseries(plt, nino34Index[2:-3].values, nino34Index.Time[2:-3].values, + xlabel, ylabel, modelTitle, calendar, axis_font, linewidths, + maxXTicks) + minDays = nino34Index.Time[2:-3].values.min() + maxDays = nino34Index.Time[2:-3].values.max() + + _plot_xtick_format(plt, calendar, minDays, maxDays, maxXTicks) + + if fileout is not None: + plt.savefig(fileout, dpi=dpi, bbox_inches='tight', pad_inches=0.1) + + if not config.getboolean('plot', 'displayToScreen'): + plt.close() + + +def _plot_nino_timeseries(plt, ninoIndex, time, xlabel, ylabel, + panelTitle, calendar, axis_font, linewidths, + maxXTicks): + ''' + Plot the nino time series on a subplot + + Parameters + ---------- + ninoIndex : numpy.array + nino34 Index values (can be obs or model) + + time : numpy.array + time values for the nino index + + calendar : specified calendar for the plot + + maxXTicks : int, optional + the maximum number of tick marks that will be allowed along the x axis. + This may need to be adjusted depending on the figure size and aspect + ratio. + + panelTitle : string + string to label the subplot with + + xlabel : string + string for x-axis label + + ylabel : string + string for y-axis label + + Author + ------ + Luke Van Roekel + + Last Modified + ------------- + 04/07/2017 + ''' + plt.title(panelTitle, y=1.06, **axis_font) + y1 = ninoIndex + nt = np.size(ninoIndex) + + y2 = np.zeros(nt) + + plt.plot(time, 0.4*np.ones(nt), '--k', + linewidth=linewidths) + plt.plot(time, -0.4*np.ones(nt), '--k', + linewidth=linewidths) + plt.fill_between(time, y1, y2, where=y1 > y2, + facecolor='red', interpolate=True, linewidth=0) + plt.fill_between(time, y1, y2, where=y1 < y2, + facecolor='blue', interpolate=True, linewidth=0) + + if xlabel is not None: + plt.xlabel(xlabel, **axis_font) + if ylabel is not None: + plt.ylabel(ylabel, **axis_font) + + +def timeseries_analysis_plot(config, dsvalues, N, title, xlabel, ylabel, + fileout, lineStyles, lineWidths, calendar, + titleFontSize=None, figsize=(15, 6), dpi=300, + maxXTicks=20): + + """ + Plots the list of time series data sets and stores the result in an image + file. + + Parameters + ---------- + config : instance of ConfigParser + the configuration, containing a [plot] section with options that + control plotting + + dsvalues : list of xarray DataSets + the data set(s) to be plotted + + N : int + the numer of time points over which to perform a moving average + + title : str + the title of the plot + + xlabel, ylabel : str + axis labels + + fileout : str + the file name to be written + + lineStyles, lineWidths : list of str + control line style/width + + titleFontSize : int, optional + the size of the title font + + figsize : tuple of float, optional + the size of the figure in inches + + dpi : int, optional + the number of dots per inch of the figure + + maxXTicks : int, optional + the maximum number of tick marks that will be allowed along the x axis. + This may need to be adjusted depending on the figure size and aspect + ratio. + + Authors + ------- + Xylar Asay-Davis, Milena Veneziani + + Last Modified + ------------- + 03/14/2017 + """ plt.figure(figsize=figsize, dpi=dpi) + minDays = [] + maxDays = [] for dsIndex in range(len(dsvalues)): dsvalue = dsvalues[dsIndex] if dsvalue is None: continue mean = pd.Series.rolling(dsvalue.to_pandas(), N, center=True).mean() - mean.plot(style=lineStyles[dsIndex], lw=lineWidths[dsIndex]) + mean = xr.DataArray.from_series(mean) + minDays.append(mean.Time.min()) + maxDays.append(mean.Time.max()) + plt.plot(mean['Time'], mean, + lineStyles[dsIndex], + linewidth=lineWidths[dsIndex]) + ax = plt.gca() + # Add a y=0 line if y ranges between positive and negative values + yaxLimits = ax.get_ylim() + if yaxLimits[0]*yaxLimits[1] < 0: + indgood = np.where(np.logical_not(np.isnan(mean))) + x = mean['Time'][indgood] + plt.plot(x, np.zeros(np.size(x)), 'k-', linewidth=1.2) + + _plot_xtick_format(plt, calendar, minDays, maxDays, maxXTicks) - if title_font_size is None: - title_font_size = config.get('plot', 'title_font_size') + if titleFontSize is None: + titleFontSize = config.get('plot', 'titleFontSize') - axis_font = {'size':config.get('plot', 'axis_font_size')} - title_font = {'size': title_font_size, - 'color':config.get('plot', 'title_font_color'), - 'weight':config.get('plot', 'title_font_weight')} - if (title != None): + axis_font = {'size': config.get('plot', 'axisFontSize')} + title_font = {'size': titleFontSize, + 'color': config.get('plot', 'titleFontColor'), + 'weight': config.get('plot', 'titleFontWeight')} + if title is not None: plt.title(title, **title_font) - if (xlabel != None): + if xlabel is not None: plt.xlabel(xlabel, **axis_font) - if (ylabel != None): + if ylabel is not None: plt.ylabel(ylabel, **axis_font) - if (fileout is not None): - plt.savefig(fileout,dpi=dpi,bbox_inches='tight',pad_inches=0.1) + if fileout is not None: + plt.savefig(fileout, dpi=dpi, bbox_inches='tight', pad_inches=0.1) + + if not config.getboolean('plot', 'displayToScreen'): + plt.close() + + +def timeseries_analysis_plot_polar(config, dsvalues, N, title, + fileout, lineStyles, lineWidths, + calendar, titleFontSize=None, + figsize=(15, 6), dpi=300): + + """ + Plots the list of time series data sets on a polar plot and stores + the result in an image file. + + Parameters + ---------- + config : instance of ConfigParser + the configuration, containing a [plot] section with options that + control plotting + + dsvalues : list of xarray DataSets + the data set(s) to be plotted + + N : int + the numer of time points over which to perform a moving average + + title : str + the title of the plot + + fileout : str + the file name to be written + + lineStyles, lineWidths : list of str + control line style/width + + titleFontSize : int, optional + the size of the title font + + figsize : tuple of float, optional + the size of the figure in inches + + dpi : int, optional + the number of dots per inch of the figure + + Authors + ------- + Adrian K. Turner + + Last Modified + ------------- + 03/15/2017 + """ + plt.figure(figsize=figsize, dpi=dpi) + + minDays = [] + maxDays = [] + for dsIndex in range(len(dsvalues)): + dsvalue = dsvalues[dsIndex] + if dsvalue is None: + continue + mean = pd.Series.rolling(dsvalue.to_pandas(), N, center=True).mean() + mean = xr.DataArray.from_series(mean) + minDays.append(mean.Time.min()) + maxDays.append(mean.Time.max()) + plt.polar((mean['Time']/365.0)*np.pi*2.0, mean, + lineStyles[dsIndex], + linewidth=lineWidths[dsIndex]) + + ax = plt.gca() - if not config.getboolean('plot','displayToScreen'): - plt.close() + # set azimuthal axis formatting + majorTickLocs = np.zeros(12) + minorTickLocs = np.zeros(12) + majorTickLocs[0] = 0.0 + minorTickLocs[0] = (constants.daysInMonth[0] * np.pi) / 365.0 + for month in range(1, 12): + majorTickLocs[month] = majorTickLocs[month-1] + \ + ((constants.daysInMonth[month-1] * np.pi * 2.0) / 365.0) + minorTickLocs[month] = minorTickLocs[month-1] + \ + (((constants.daysInMonth[month-1] + \ + constants.daysInMonth[month]) * np.pi) / 365.0) + + ax.set_xticks(majorTickLocs) + ax.set_xticklabels([]) + + ax.set_xticks(minorTickLocs, minor=True) + ax.set_xticklabels(constants.abrevMonthNames, minor=True) + + if titleFontSize is None: + titleFontSize = config.get('plot', 'titleFontSize') + + axis_font = {'size': config.get('plot', 'axisFontSize')} + title_font = {'size': titleFontSize, + 'color': config.get('plot', 'titleFontColor'), + 'weight': config.get('plot', 'titleFontWeight')} + if title is not None: + plt.title(title, **title_font) + + if fileout is not None: + plt.savefig(fileout, dpi=dpi, bbox_inches='tight', pad_inches=0.1) + + if not config.getboolean('plot', 'displayToScreen'): + plt.close() def plot_polar_comparison( @@ -49,77 +571,151 @@ def plot_polar_comparison( cmapDiff, clevsDiff, fileout, - title = None, - plotProjection = "npstere", - latmin = 50.0, - lon0 = 0, - modelTitle = "Model", - obsTitle = "Observations", - diffTitle = "Model-Observations", - cbarlabel = "units", - title_font_size = None, - figsize = (8,22), - dpi = 300): + title=None, + plotProjection='npstere', + latmin=50.0, + lon0=0, + modelTitle='Model', + obsTitle='Observations', + diffTitle='Model-Observations', + cbarlabel='units', + titleFontSize=None, + figsize=(8, 22), + dpi=300): + + """ + Plots a data set around either the north or south pole. + + Parameters + ---------- + config : instance of ConfigParser + the configuration, containing a [plot] section with options that + control plotting + + Lons, Lats : float arrays + longitude and latitude arrays + + modelArray, obsArray : float arrays + model and observational data sets + + diffArray : float array + difference between modelArray and obsArray + + cmapModelObs : str + colormap of model and observations panel + + clevsModelObs : int array + colorbar values for model and observations panel + + cmapDiff : str + colormap of difference (bias) panel + + clevsDiff : int array + colorbar values for difference (bias) panel + + fileout : str + the file name to be written + + title : str, optional + the subtitle of the plot + + plotProjection : str, optional + Basemap projection for the plot + + modelTitle : str, optional + title of the model panel + + obsTitle : str, optional + title of the observations panel + + diffTitle : str, optional + title of the difference (bias) panel + + cbarlabel : str, optional + label on the colorbar + + titleFontSize : int, optional + size of the title font + + figsize : tuple of float, optional + the size of the figure in inches + + dpi : int, optional + the number of dots per inch of the figure + + Authors + ------- + Xylar Asay-Davis, Milena Veneziani + + Last Modified + ------------- + 03/17/2017 + """ # set up figure fig = plt.figure(figsize=figsize, dpi=dpi) if (title is not None): - if title_font_size is None: - title_font_size = config.get('plot', 'title_font_size') - title_font = {'size': title_font_size, - 'color':config.get('plot', 'title_font_color'), - 'weight':config.get('plot', 'title_font_weight')} + if titleFontSize is None: + titleFontSize = config.get('plot', 'titleFontSize') + title_font = {'size': titleFontSize, + 'color': config.get('plot', 'titleFontColor'), + 'weight': config.get('plot', 'titleFontWeight')} fig.suptitle(title, y=0.95, **title_font) - axis_font = {'size':config.get('plot', 'axis_font_size')} + axis_font = {'size': config.get('plot', 'axisFontSize')} - m = Basemap(projection=plotProjection,boundinglat=latmin,lon_0=lon0,resolution='l') - x, y = m(Lons, Lats) # compute map proj coordinates + m = Basemap(projection=plotProjection, boundinglat=latmin, + lon_0=lon0, resolution='l') + x, y = m(Lons, Lats) # compute map proj coordinates normModelObs = cols.BoundaryNorm(clevsModelObs, cmapModelObs.N) normDiff = cols.BoundaryNorm(clevsDiff, cmapDiff.N) - plt.subplot(3,1,1) + plt.subplot(3, 1, 1) plt.title(modelTitle, y=1.06, **axis_font) m.drawcoastlines() - m.fillcontinents(color='grey',lake_color='white') - m.drawparallels(np.arange(-80.,81.,10.)) - m.drawmeridians(np.arange(-180.,181.,20.),labels=[True,True,True,True]) - cs = m.contourf(x,y,modelArray,cmap=cmapModelObs,norm=normModelObs,spacing='uniform',levels=clevsModelObs) - cbar = m.colorbar(cs,location='right',pad="15%",spacing='uniform',ticks=clevsModelObs,boundaries=clevsModelObs) - #cbar = m.colorbar(cs,location='right',pad="15%",spacing='uniform',extendfrac='auto', - # extendrect='True',ticks=clevsModelObs, boundaries=clevsModelObs) + m.fillcontinents(color='grey', lake_color='white') + m.drawparallels(np.arange(-80., 81., 10.)) + m.drawmeridians(np.arange(-180., 181., 20.), + labels=[True, True, True, True]) + cs = m.contourf(x, y, modelArray, cmap=cmapModelObs, norm=normModelObs, + spacing='uniform', levels=clevsModelObs) + cbar = m.colorbar(cs, location='right', pad="15%", spacing='uniform', + ticks=clevsModelObs, boundaries=clevsModelObs) cbar.set_label(cbarlabel) - plt.subplot(3,1,2) + plt.subplot(3, 1, 2) plt.title(obsTitle, y=1.06, **axis_font) m.drawcoastlines() - m.fillcontinents(color='grey',lake_color='white') - m.drawparallels(np.arange(-80.,81.,10.)) - m.drawmeridians(np.arange(-180.,181.,20.),labels=[True,True,True,True]) - cs = m.contourf(x,y,obsArray,cmap=cmapModelObs,norm=normModelObs,spacing='uniform',levels=clevsModelObs) - cbar = m.colorbar(cs,location='right',pad="15%",spacing='uniform',ticks=clevsModelObs,boundaries=clevsModelObs) - #cbar = m.colorbar(cs,location='right',pad="15%",spacing='uniform',extendfrac='auto', - # extendrect='True',ticks=clevsModelObs, boundaries=clevsModelObs) + m.fillcontinents(color='grey', lake_color='white') + m.drawparallels(np.arange(-80., 81., 10.)) + m.drawmeridians(np.arange(-180., 181., 20.), + labels=[True, True, True, True]) + cs = m.contourf(x, y, obsArray, cmap=cmapModelObs, norm=normModelObs, + spacing='uniform', levels=clevsModelObs) + cbar = m.colorbar(cs, location='right', pad="15%", spacing='uniform', + ticks=clevsModelObs, boundaries=clevsModelObs) cbar.set_label(cbarlabel) - plt.subplot(3,1,3) + plt.subplot(3, 1, 3) plt.title(diffTitle, y=1.06, **axis_font) m.drawcoastlines() - m.fillcontinents(color='grey',lake_color='white') - m.drawparallels(np.arange(-80.,81.,10.)) - m.drawmeridians(np.arange(-180.,181.,20.),labels=[True,True,True,True]) - cs = m.contourf(x,y,diffArray,cmap=cmapDiff,norm=normDiff,spacing='uniform',levels=clevsDiff) - cbar = m.colorbar(cs,location='right',pad="15%",spacing='uniform',ticks=clevsDiff,boundaries=clevsModelObs) - #cbar = m.colorbar(cs,location='right',pad="15%",spacing='uniform',extendfrac='auto', - # extendrect='True',ticks=clevsDiff, boundaries=clevsDiff) + m.fillcontinents(color='grey', lake_color='white') + m.drawparallels(np.arange(-80., 81., 10.)) + m.drawmeridians(np.arange(-180., 181., 20.), + labels=[True, True, True, True]) + cs = m.contourf(x, y, diffArray, cmap=cmapDiff, norm=normDiff, + spacing='uniform', levels=clevsDiff) + cbar = m.colorbar(cs, location='right', pad="15%", spacing='uniform', + ticks=clevsDiff, boundaries=clevsModelObs) cbar.set_label(cbarlabel) if (fileout is not None): - plt.savefig(fileout,dpi=dpi,bbox_inches='tight',pad_inches=0.1) + plt.savefig(fileout, dpi=dpi, bbox_inches='tight', pad_inches=0.1) - if not config.getboolean('plot','displayToScreen'): + if not config.getboolean('plot', 'displayToScreen'): plt.close() + def plot_global_comparison( config, Lons, @@ -132,73 +728,406 @@ def plot_global_comparison( cmapDiff, clevsDiff, fileout, - title = None, - modelTitle = "Model", - obsTitle = "Observations", - diffTitle = "Model-Observations", - cbarlabel = "units", - title_font_size = None, - figsize = (8,12), - dpi = 300): + title=None, + modelTitle='Model', + obsTitle='Observations', + diffTitle='Model-Observations', + cbarlabel='units', + titleFontSize=None, + figsize=(8, 12), + dpi=300): + + """ + Plots a data set as a longitude/latitude map. + + Parameters + ---------- + config : instance of ConfigParser + the configuration, containing a [plot] section with options that + control plotting + + Lons, Lats : float arrays + longitude and latitude arrays + + modelArray, obsArray : float arrays + model and observational data sets + + diffArray : float array + difference between modelArray and obsArray + + cmapModelObs : str + colormap of model and observations panel + + clevsModelObs : int array + colorbar values for model and observations panel + + cmapDiff : str + colormap of difference (bias) panel + + clevsDiff : int array + colorbar values for difference (bias) panel + + fileout : str + the file name to be written + + title : str, optional + the subtitle of the plot + + modelTitle : str, optional + title of the model panel + + obsTitle : str, optional + title of the observations panel + + diffTitle : str, optional + title of the difference (bias) panel + + cbarlabel : str, optional + label on the colorbar + + titleFontSize : int, optional + size of the title font + + figsize : tuple of float, optional + the size of the figure in inches + + dpi : int, optional + the number of dots per inch of the figure + + Authors + ------- + Xylar Asay-Davis, Milena Veneziani + + Last Modified + ------------- + 03/13/2017 + """ # set up figure fig = plt.figure(figsize=figsize, dpi=dpi) if (title is not None): - if title_font_size is None: - title_font_size = config.get('plot', 'title_font_size') - title_font = {'size': title_font_size, - 'color':config.get('plot', 'title_font_color'), - 'weight':config.get('plot', 'title_font_weight')} + if titleFontSize is None: + titleFontSize = config.get('plot', 'titleFontSize') + title_font = {'size': titleFontSize, + 'color': config.get('plot', 'titleFontColor'), + 'weight': config.get('plot', 'titleFontWeight')} fig.suptitle(title, y=0.95, **title_font) - axis_font = {'size':config.get('plot', 'axis_font_size')} - - m = Basemap(projection='cyl',llcrnrlat=-85,urcrnrlat=86,llcrnrlon=-180,urcrnrlon=181,resolution='l') - #m = Basemap(projection='robin',lon_0=200,resolution='l') # this doesn't work because lons are -180 to 180.. - x, y = m(Lons, Lats) # compute map proj coordinates + axis_font = {'size': config.get('plot', 'axisFontSize')} + + m = Basemap(projection='cyl', llcrnrlat=-85, urcrnrlat=86, llcrnrlon=-180, + urcrnrlon=181, resolution='l') + x, y = m(Lons, Lats) # compute map proj coordinates normModelObs = cols.BoundaryNorm(clevsModelObs, cmapModelObs.N) normDiff = cols.BoundaryNorm(clevsDiff, cmapDiff.N) - - plt.subplot(3,1,1) + + plt.subplot(3, 1, 1) plt.title(modelTitle, y=1.06, **axis_font) m.drawcoastlines() - m.fillcontinents(color='grey',lake_color='white') - m.drawparallels(np.arange(-80.,80.,20.),labels=[True,False,False,False]) - m.drawmeridians(np.arange(-180.,180.,60.),labels=[False,False,False,True]) - cs = m.contourf(x,y,modelArray,cmap=cmapModelObs,norm=normModelObs,spacing='uniform',levels=clevsModelObs,extend='both') - cbar = m.colorbar(cs,location='right',pad="5%",spacing='uniform',ticks=clevsModelObs,boundaries=clevsModelObs) - #cbar = m.colorbar(cs,location='right',pad="5%",spacing='uniform',extendfrac='auto', - # extendrect='True',ticks=clevsModelObs, boundaries=clevsModelObs) + m.fillcontinents(color='grey', lake_color='white') + m.drawparallels(np.arange(-80., 80., 20.), + labels=[True, False, False, False]) + m.drawmeridians(np.arange(-180., 180., 60.), + labels=[False, False, False, True]) + cs = m.contourf(x, y, modelArray, cmap=cmapModelObs, norm=normModelObs, + spacing='uniform', levels=clevsModelObs, extend='both') + cbar = m.colorbar(cs, location='right', pad="5%", spacing='uniform', + ticks=clevsModelObs, boundaries=clevsModelObs) cbar.set_label(cbarlabel) - plt.subplot(3,1,2) + plt.subplot(3, 1, 2) plt.title(obsTitle, y=1.06, **axis_font) m.drawcoastlines() - m.fillcontinents(color='grey',lake_color='white') - m.drawparallels(np.arange(-80.,80.,20.),labels=[True,False,False,False]) - m.drawmeridians(np.arange(-180.,180.,40.),labels=[False,False,False,True]) - cs = m.contourf(x,y,obsArray,cmap=cmapModelObs,norm=normModelObs,spacing='uniform',levels=clevsModelObs,extend='both') - cbar = m.colorbar(cs,location='right',pad="5%",spacing='uniform',ticks=clevsModelObs,boundaries=clevsModelObs) - #cbar = m.colorbar(cs,location='right',pad="5%",spacing='uniform',extendfrac='auto', - # extendrect='True',ticks=clevsModelObs, boundaries=clevsModelObs) + m.fillcontinents(color='grey', lake_color='white') + m.drawparallels(np.arange(-80., 80., 20.), + labels=[True, False, False, False]) + m.drawmeridians(np.arange(-180., 180., 40.), + labels=[False, False, False, True]) + cs = m.contourf(x, y, obsArray, cmap=cmapModelObs, norm=normModelObs, + spacing='uniform', levels=clevsModelObs, extend='both') + cbar = m.colorbar(cs, location='right', pad="5%", spacing='uniform', + ticks=clevsModelObs, boundaries=clevsModelObs) cbar.set_label(cbarlabel) - - plt.subplot(3,1,3) + + plt.subplot(3, 1, 3) plt.title(diffTitle, y=1.06, **axis_font) m.drawcoastlines() - m.fillcontinents(color='grey',lake_color='white') - m.drawparallels(np.arange(-80.,80.,20.),labels=[True,False,False,False]) - m.drawmeridians(np.arange(-180.,180.,40.),labels=[False,False,False,True]) - cs = m.contourf(x,y,diffArray,cmap=cmapDiff,norm=normDiff,spacing='uniform',levels=clevsDiff,extend='both') - cbar = m.colorbar(cs,location='right',pad="5%",spacing='uniform',ticks=clevsDiff,boundaries=clevsModelObs) - #cbar = m.colorbar(cs,location='right',pad="5%",spacing='uniform',extendfrac='auto', - # extendrect='True',ticks=clevsDiff, boundaries=clevsDiff) - cs.cmap.set_over((1., 1., 1.)) - cs.cmap.set_under((0., 0., 0.)) + m.fillcontinents(color='grey', lake_color='white') + m.drawparallels(np.arange(-80., 80., 20.), + labels=[True, False, False, False]) + m.drawmeridians(np.arange(-180., 180., 40.), + labels=[False, False, False, True]) + cs = m.contourf(x, y, diffArray, cmap=cmapDiff, norm=normDiff, + spacing='uniform', levels=clevsDiff, extend='both') + cbar = m.colorbar(cs, location='right', pad="5%", spacing='uniform', + ticks=clevsDiff, boundaries=clevsModelObs) cbar.set_label(cbarlabel) if (fileout is not None): - plt.savefig(fileout,dpi=dpi,bbox_inches='tight',pad_inches=0.1) + plt.savefig(fileout, dpi=dpi, bbox_inches='tight', pad_inches=0.1) - if not config.getboolean('plot','displayToScreen'): + if not config.getboolean('plot', 'displayToScreen'): plt.close() + + +def _date_tick(days, pos, calendar='gregorian', includeMonth=True): + days = np.maximum(days, 0.) + date = days_to_datetime(days, calendar) + if includeMonth: + return '{:04d}-{:02d}'.format(date.year, date.month) + else: + return '{:04d}'.format(date.year) + + +def plot_vertical_section( + config, + xArray, + depthArray, + fieldArray, + colormapName, + colorbarLevels, + contourLevels, + colorbarLabel=None, + title=None, + xlabel=None, + ylabel=None, + fileout='moc.png', + figsize=(10, 4), + dpi=300): # {{{ + + """ + Plots a data set as a x distance (latitude, longitude, + or spherical distance) vs depth map (vertical section). + + Parameters + ---------- + config : instance of ConfigParser + the configuration, containing a [plot] section with options that + control plotting + + xArray : float array + x array (latitude, longitude, or spherical distance) + + depthArray : float array + depth array [m] + + fieldArray : float array + field array to plot + + colormapName : str + colormap of plot + + colorbarLevels : int array + colorbar levels of plot + + contourLevels : int levels + levels of contours to be drawn + + colorbarLabel : str, optional + label of the colorbar + + title : str, optional + title of plot + + xlabel, ylabel : str, optional + label of x- and y-axis + + fileout : str, optional + the file name to be written + + figsize : tuple of float, optional + size of the figure in inches + + dpi : int, optional + number of dots per inch of the figure + + Authors + ------- + Milena Veneziani, Mark Petersen + + Last Modified + ------------- + 03/13/2017 + """ + + # set up figure + fig = plt.figure(figsize=figsize, dpi=dpi) + + x, y = np.meshgrid(xArray, depthArray) # change to zMid + + normModelObs = cols.BoundaryNorm(colorbarLevels, colormapName.N) + + cs = plt.contourf(x, y, fieldArray, cmap=colormapName, norm=normModelObs, + spacing='uniform', levels=colorbarLevels, extend='both') + plt.contour(x, y, fieldArray, levels=contourLevels[::2], colors='k') + + cbar = plt.colorbar(cs, orientation='vertical', spacing='uniform', + ticks=colorbarLevels, boundaries=colorbarLevels) + if colorbarLabel is not None: + cbar.set_label(colorbarLabel) + + axis_font = {'size': config.get('plot', 'axisFontSize')} + title_font = {'size': config.get('plot', 'titleFontSize'), + 'color': config.get('plot', 'titleFontColor'), + 'weight': config.get('plot', 'titleFontWeight')} + if title is not None: + plt.title(title, **title_font) + if xlabel is not None: + plt.xlabel(xlabel, **axis_font) + if ylabel is not None: + plt.ylabel(ylabel, **axis_font) + + plt.gca().invert_yaxis() + + if (fileout is not None): + plt.savefig(fileout, dpi=dpi, bbox_inches='tight', pad_inches=0.1) + + if not config.getboolean('plot', 'displayToScreen'): + plt.close() + + return # }}} + + +def setup_colormap(config, configSectionName, suffix=''): + + ''' + Set up a colormap from the registry + + Parameters + ---------- + config : instance of ConfigParser + the configuration, containing a [plot] section with options that + control plotting + + configSectionName : str + name of config section + + suffix: str, optional + suffix of colormap related options + + Returns + ------- + colormap : srt + new colormap + + colorbarLevels : int array + colorbar levels + + Authors + ------- + Xylar Asay-Davis, Milena Veneziani + + Last modified + ------------- + 03/17/2017 + ''' + + colormap = plt.get_cmap(config.get(configSectionName, + 'colormapName{}'.format(suffix))) + indices = config.getExpression(configSectionName, + 'colormapIndices{}'.format(suffix)) + colorbarLevels = config.getExpression(configSectionName, + 'colorbarLevels{}'.format(suffix)) + + # set under/over values based on the first/last indices in the colormap + underColor = colormap(indices[0]) + overColor = colormap(indices[-1]) + if len(colorbarLevels)+1 == len(indices): + # we have 2 extra values for the under/over so make the colormap + # without these values + indices = indices[1:-1] + colormap = cols.ListedColormap(colormap(indices), + 'colormapName{}'.format(suffix)) + colormap.set_under(underColor) + colormap.set_over(overColor) + return (colormap, colorbarLevels) + + +def _plot_size_y_axis(plt, xaxisValues, **data): + ''' + Resize the y-axis limit based on the curves being plotted + + Parameters + ---------- + plt : plot handle + + xaxisValues : numpy.array + Values plotted along the x-axis + + data : dictionary entries must be numpy.array + data for curves on plot + + Author + ------ + Luke Van Roekel + + Last modified + ------------- + 04/07/2017 + ''' + + ax = plt.gca() + xmin = ax.get_xlim()[0] + xmax = ax.get_xlim()[1] + + # find period/frequency bounds for chosen xmin/xmax + minIndex = np.abs(xaxisValues - xmin).argmin() + maxIndex = np.abs(xaxisValues - xmax).argmin() + + # find maximum value of three curves plotted + maxCurveVal = -1E20 + for key in data: + maxTemp = data[key][minIndex:maxIndex].max() + maxCurveVal = max(maxTemp, maxCurveVal) + + return maxCurveVal + + +def _plot_xtick_format(plt, calendar, minDays, maxDays, maxXTicks): + ''' + Formats tick labels and positions along the x-axis for time series + / index plots + + Parameters + ---------- + plt : plt handle on which to change ticks + + calendar : specified calendar for the plot + + minDays : start time for labels + + maxDays : end time for labels + + Author + ------ + Xylar Asay-Davis + + ''' + ax = plt.gca() + + start = days_to_datetime(np.amin(minDays), calendar=calendar) + end = days_to_datetime(np.amax(maxDays), calendar=calendar) + + if (end.year - start.year > maxXTicks/2): + major = [date_to_days(year=year, calendar=calendar) + for year in np.arange(start.year, end.year+1)] + formatterFun = partial(_date_tick, calendar=calendar, + includeMonth=False) + else: + # add ticks for months + major = [] + for year in range(start.year, end.year+1): + for month in range(1, 13): + major.append(date_to_days(year=year, month=month, + calendar=calendar)) + formatterFun = partial(_date_tick, calendar=calendar, + includeMonth=True) + + ax.xaxis.set_major_locator(FixedLocator(major, maxXTicks)) + ax.xaxis.set_major_formatter(FuncFormatter(formatterFun)) + + plt.setp(ax.get_xticklabels(), rotation=30) + + plt.autoscale(enable=True, axis='x', tight=True) + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/time_series/__init__.py b/mpas_analysis/shared/time_series/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/mpas_analysis/shared/time_series/time_series.py b/mpas_analysis/shared/time_series/time_series.py new file mode 100644 index 000000000..a132419ad --- /dev/null +++ b/mpas_analysis/shared/time_series/time_series.py @@ -0,0 +1,153 @@ +""" +Utility functions related to time-series data sets + +Authors +------- +Xylar Asay-Davis + +Last Modified +------------- +04/08/2017 +""" + +import xarray as xr +import numpy +import os +import warnings + +from ..timekeeping.utility import days_to_datetime + + +def cache_time_series(timesInDataSet, timeSeriesCalcFunction, cacheFileName, + calendar, yearsPerCacheUpdate=1, + printProgress=False): # {{{ + ''' + Create or update a NetCDF file ``cacheFileName`` containing the given time + series, calculated with ``timeSeriesCalcFunction`` over the given times, + start and end year, and time frequency with which results are cached. + + Note: only works with climatologies where the mask (locations of ``NaN`` + values) doesn't vary with time. + + Parameters + ---------- + timesInDataSet : array-like + Times at which the time series is to be calculated, typically taken + from ``ds.Times.values`` for a data set from which the time series + will be extracted or computed. + + timeSeriesCalcFunction : function + A function with arguments ``timeIndices``, indicating the entries in + ``timesInDataSet`` to be computed, and ``firstCall``, indicating + whether this is the first call to the funciton (useful for printing + progress information). + + cacheFileName : str + The absolute path to the cache file where the times series will be + stored + + calendar : ``{'gregorian', 'gregorian_noleap'}`` + The name of one of the calendars supported by MPAS cores, used to + determine ``year`` and ``month`` from ``Time`` coordinate + + yearsPerCacheUpdate : int, optional + The frequency with which the cache file is updated as the computation + progresses. If the computation is expensive, it may be useful to + output the file frequently. If not, there will be needless overhead + in caching the file too frequently. + + printProgress: bool, optional + Whether progress messages should be printed as the climatology is + computed + + Returns + ------- + climatology : object of same type as ``ds`` + A data set without the ``'Time'`` coordinate containing the mean + of ds over all months in monthValues, weighted by the number of days + in each month. + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 04/08/2017 + + ''' + + timesProcessed = numpy.zeros(len(timesInDataSet), bool) + # figure out which files to load and which years go in each file + continueOutput = os.path.exists(cacheFileName) + cacheDataSetExists = False + if continueOutput: + if printProgress: + print ' Read in previously computed time series' + # read in what we have so far + + try: + dsCache = xr.open_dataset(cacheFileName, decode_times=False) + cacheDataSetExists = True + except IOError: + # assuming the cache file is corrupt, so deleting it. + message = 'Deleting cache file {}, which appears to have ' \ + 'been corrupted.'.format(cacheFileName) + warnings.warn(message) + os.remove(cacheFileName) + + if cacheDataSetExists: + # force loading and then close so we can overwrite the file later + dsCache.load() + dsCache.close() + for time in dsCache.Time.values: + timesProcessed[timesInDataSet == time] = True + + datetimes = days_to_datetime(timesInDataSet, calendar=calendar) + yearsInDataSet = numpy.array([date.year for date in datetimes]) + + startYear = yearsInDataSet[0] + endYear = yearsInDataSet[-1] + + firstProcessed = True + for firstYear in range(startYear, endYear+1, yearsPerCacheUpdate): + years = range(firstYear, numpy.minimum(endYear+1, + firstYear+yearsPerCacheUpdate)) + + mask = numpy.zeros(len(yearsInDataSet), bool) + for year in years: + mask = numpy.logical_or(mask, yearsInDataSet == year) + mask = numpy.logical_and(mask, numpy.logical_not(timesProcessed)) + + timeIndices = numpy.nonzero(mask)[0] + + if len(timeIndices) == 0: + # no unprocessed time entries in this data range + continue + + if printProgress: + if firstProcessed: + print ' Process and save time series' + if yearsPerCacheUpdate == 1: + print ' {:04d}'.format(years[0]) + else: + print ' {:04d}-{:04d}'.format(years[0], years[-1]) + + ds = timeSeriesCalcFunction(timeIndices, firstProcessed) + firstProcessed = False + + if cacheDataSetExists: + dsCache = xr.concat([dsCache, ds], dim='Time') + # now sort the Time dimension: + dsCache = dsCache.loc[{'Time': sorted(dsCache.Time.values)}] + else: + dsCache = ds + cacheDataSetExists = True + + dsCache.to_netcdf(cacheFileName) + + return dsCache.sel(Time=slice(timesInDataSet[0], timesInDataSet[-1])) + + # }}} + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/timekeeping/Date.py b/mpas_analysis/shared/timekeeping/Date.py deleted file mode 100644 index bd7dab3ac..000000000 --- a/mpas_analysis/shared/timekeeping/Date.py +++ /dev/null @@ -1,285 +0,0 @@ -""" - Module for the Date class used to parse and compare dates and times - - Xylar Asay-Davis - Last modified: 11/02/2016 -""" - -import functools -import numpy -import datetime - -@functools.total_ordering -class Date(object): - """ - Class for representing dates on a 365-day calendar. - Date objects can be created either from a formatted string or - from a number of seconds (mostly intended for internal use). - Date objects can be added to or subtracted from one another and - can be compared with one another. - """ - - # constructor - def __init__(self, dateString=None, isInterval=False, totalSeconds=None, - years=None, months=None, days=None, - hours=None, minutes=None, seconds=None): - """ - creates a new Date object. If the dateString is supplied, it should - have one of the following formats: - YYYY-MM-DD_hh:mm:ss - YYYY-MM-DD_hh.mm.ss - YYYY-MM-DD_SSSSS - DDD_hh:mm:ss - DDD_hh.mm.ss - DDD_SSSSS - hh.mm.ss - hh:mm:ss - YYYY-MM-DD - SSSSS - - isInterval indicates whether the date is an interval (difference - between dates) or a normal (non-interval) date. Intervals mean that - the month and day start with 0, while strings representing non-interval - dates have day and months starting with 1. - - If a dateString is not supplied, totalSeconds can be used to supply - the date as a number of seconds (as a 64-bit integer). - - If neither dateString nor totalSeconds is given, all of years, months, - days, hours, minutes and seconds are required to represent the date. - These argument are intended mostly for internal use. - """ - - self.isInterval = isInterval - if dateString is not None: - self._parseDate(dateString) - elif totalSeconds is not None: - self._secondsToDate(totalSeconds) - else: - if years is None: - raise ValueError('years must be set') - self.years = numpy.int64(years) - if months is None: - raise ValueError('months must be set') - self.months = numpy.int64(months) - if days is None: - raise ValueError('days must be set') - self.days = numpy.int64(days) - if hours is None: - raise ValueError('hours must be set') - self.hours = numpy.int64(hours) - if minutes is None: - raise ValueError('minutes must be set') - self.minutes = numpy.int64(minutes) - if seconds is None: - raise ValueError('seconds must be set') - self.seconds = numpy.int64(seconds) - self._setTotalSeconds() - - def to_datetime(self, yearOffset=0): - """ - Converts the date object to a datetime object. - The yearOffset is added to this date's year, and - the resulting date is clamped to the range supported by - numpy's datetime64[ns], used internally by xarray an - pandas - - Last modified: 11/28/2016 - Author: Xylar Asay-Davis - """ - if self.isInterval: - raise ValueError("self.isInterval == True. Use to_timedelta " - "instead of to_datetime") - - year = numpy.maximum(datetime.MINYEAR, - numpy.minimum(datetime.MAXYEAR, - self.years+yearOffset)) - outDate = datetime.datetime(year=year, month=self.months+1, - day=self.days+1, hour=self.hours, - minute=self.minutes, second=self.seconds) - - minDate = datetime.datetime(year=1678, month=1, day=1, - hour=0, minute=0, second=0) - maxDate = datetime.datetime(year=2262, month=1, day=1, - hour=0, minute=0, second=0) - outDate = max(minDate, min(maxDate, outDate)) - return outDate - - def to_timedelta(self): - """ - Converts the date object to a timedelta object - - Last modified: 11/28/2016 - Author: Xylar Asay-Davis - """ - if not self.isInterval: - raise ValueError("self.isInterval == False. Use to_datetime " - "instead of to_timedelta") - - days = 365*self.years + self._monthsToDays(self.months) + self.days - return datetime.timedelta(days=self.days, hours=self.hours, - minutes=self.minutes, seconds=self.seconds) - - def __lt__(self, other): - if self.isInterval != other.isInterval: - raise ValueError('Comparing interval with non-interval Date ' - 'object') - return self.totalSeconds < other.totalSeconds - - def __eq__(self, other): - if self.isInterval != other.isInterval: - raise ValueError('Comparing interval with non-interval Date ' - 'object') - return self.totalSeconds == other.totalSeconds - - def __add__(self, other): - if self.isInterval: - raise ValueError('Attempting to add to an interval Date object') - if not other.isInterval: - raise ValueError('Attempting to add a non-interval Date object') - - seconds = self.seconds + other.seconds - minutes = self.minutes + other.minutes + seconds/60 - seconds %= 60 - hours = self.hours + other.hours + minutes/60 - minutes %= 60 - months = self.months + other.months - years = self.years + other.years + months/12 - months %= 12 - days = (self._monthsToDays(months) + self.days + other.days + hours/24) - years += days/365 - days %= 365 - (months, days) = self._daysToMonthsAndDays(days) - return Date(isInterval=False, years=years, months=months, days=days, - hours=hours, minutes=minutes, seconds=seconds) - - def __sub__(self, other): - if self.isInterval: - raise ValueError('Attempting to subtract from an interval Date ' - 'object') - - isInterval = not other.isInterval - seconds = self.seconds - other.seconds - minutes = self.minutes - other.minutes + seconds/60 - seconds %= 60 - hours = self.hours - other.hours + minutes/60 - minutes %= 60 - months = self.months - other.months - years = self.years - other.years + months/12 - months %= 12 - days = (self._monthsToDays(months) + self.days - other.days + hours/24) - years += days/365 - days %= 365 - (months, days) = self._daysToMonthsAndDays(days) - - return Date(isInterval=isInterval, years=years, months=months, - days=days, hours=hours, minutes=minutes, seconds=seconds) - - def __str__(self): - if self.isInterval: - offset = 0 - else: - offset = 1 - return '{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}'.format( - self.years, self.months+offset, self.days+offset, - self.hours, self.minutes, self.seconds) - - def _diffSeconds(self, other): - return - - def _setTotalSeconds(self): - days = self.years*365 + self._monthsToDays(self.months) + self.days - self.totalSeconds = (((days*24 + self.hours)*60 + self.minutes)*60 + - self.seconds) - - def _monthsToDays(self, months): - daysInMonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] - days = numpy.int64(0) - for month in range(months): - days += daysInMonth[month] - return days - - def _daysToMonthsAndDays(self, days): - daysInMonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] - assert(days < 365) - months = numpy.int64(0) - while days > daysInMonth[months]: - days -= daysInMonth[months] - months += 1 - days = numpy.int64(days) - return (months, days) - - def _secondsToDate(self, seconds): - self.totalSeconds = seconds - self.years = numpy.int64(seconds / 31536000) - seconds %= 31536000 - days = numpy.int64(seconds / 86400) - (self.months, self.days) = self._daysToMonthsAndDays(days) - seconds %= 86400 - self.hours = numpy.int64(seconds / 3600) - seconds %= 3600 - self.minutes = numpy.int64(seconds / 60) - seconds %= 60 - self.seconds = seconds - - def _parseDate(self, dateString): - """ - parses a dateString in one of the following formats into - a Date object: - YYYY-MM-DD_hh:mm:ss - YYYY-MM-DD_hh.mm.ss - YYYY-MM-DD_SSSSS - DDD_hh:mm:ss - DDD_hh.mm.ss - DDD_SSSSS - hh.mm.ss - hh:mm:ss - YYYY-MM-DD - YYYY-MM - SSSSS - """ - if self.isInterval: - offset = numpy.int64(0) - else: - offset = numpy.int64(1) - - if '_' in dateString: - ymd, hms = dateString.split('_') - else: - if '-' in dateString: - ymd = dateString - # error can result if dateString = '1990-01' - # assume this means '1990-01-01' - if len(ymd.split('-')) == 2: - ymd += '-01' - hms = '00:00:00' - else: - if self.isInterval: - ymd = '0000-00-00' - else: - ymd = '0000-01-01' - hms = dateString - - if '.' in hms: - hms = hms.replace('.', ':') - - if '-' in ymd: - (self.years, self.months, self.days) \ - = [numpy.int64(sub) for sub in ymd.split('-')] - self.months -= offset - self.days -= offset - else: - self.days = numpy.int64(ymd) - offset - self.years = numpy.int64(0) - self.months = numpy.int64(0) - - if ':' in hms: - (self.hours, self.minutes, self.seconds) \ - = [numpy.int64(sub) for sub in hms.split(':')] - else: - self.seconds = numpy.int64(hms) - self.minutes = numpy.int64(0) - self.hours = numpy.int64(0) - self._setTotalSeconds() - -# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/timekeeping/MpasRelativeDelta.py b/mpas_analysis/shared/timekeeping/MpasRelativeDelta.py new file mode 100644 index 000000000..295b8e50e --- /dev/null +++ b/mpas_analysis/shared/timekeeping/MpasRelativeDelta.py @@ -0,0 +1,155 @@ +import datetime +from dateutil.relativedelta import relativedelta +from calendar import monthrange, isleap + + +class MpasRelativeDelta(relativedelta): + """ + MpasRelativeDelta is a subclass of dateutil.relativedelta for relative time + intervals with different MPAS calendars. + + Only relative intervals (years, months, etc.) are supported and not the + absolute date specifications (year, month, etc.). Addition/subtraction + of datetime.datetime objects or other MpasRelativeDelta (but currently not + datetime.date, datetime.timedelta or other related objects) is supported. + + Author + ------ + Xylar Asay-Davis + + Last Modified + ------------- + 02/09/2017 + """ + + def __init__(self, dt1=None, dt2=None, years=0, months=0, days=0, + hours=0, minutes=0, seconds=0, calendar='gregorian'): + if calendar not in ['gregorian', 'gregorian_noleap']: + raise ValueError('Unsupported MPAs calendar {}'.format(calendar)) + self.calendar = calendar + super(MpasRelativeDelta, self).__init__(dt1=dt1, dt2=dt2, years=years, + months=months, days=days, + hours=hours, minutes=minutes, + seconds=seconds) + + def __add__(self, other): + if not isinstance(other, (datetime.datetime, MpasRelativeDelta)): + return NotImplemented + + if isinstance(other, MpasRelativeDelta): + if self.calendar != other.calendar: + raise ValueError('MpasRelativeDelta objects can only be added ' + 'if their calendars match.') + years = self.years + other.years + months = self.months + other.months + if months > 12: + years += 1 + months -= 12 + elif months < 1: + years -= 1 + months += 12 + + return self.__class__(years=years, + months=months, + days=self.days + other.days, + hours=self.hours + other.hours, + minutes=self.minutes + other.minutes, + seconds=self.seconds + other.seconds, + calendar=self.calendar) + + year = other.year+self.years + + month = other.month + if self.months != 0: + assert 1 <= abs(self.months) <= 12 + month += self.months + if month > 12: + year += 1 + month -= 12 + elif month < 1: + year -= 1 + month += 12 + + if self.calendar == 'gregorian': + daysInMonth = monthrange(year, month)[1] + elif self.calendar == 'gregorian_noleap': + # use year 0001, which is not a leapyear + daysInMonth = monthrange(1, month)[1] + + day = min(daysInMonth, other.day) + repl = {"year": year, "month": month, "day": day} + + days = self.days + if self.calendar == 'gregorian_noleap' and isleap(year): + if month == 2 and day+days >= 29: + # skip forward over the leap day + days += 1 + elif month == 3 and day+days <= 0: + # skip backward over the leap day + days -= 1 + + return (other.replace(**repl) + + datetime.timedelta(days=days, + hours=self.hours, + minutes=self.minutes, + seconds=self.seconds)) + + def __radd__(self, other): + return self.__add__(other) + + def __rsub__(self, other): + return self.__neg__().__add__(other) + + def __sub__(self, other): + if not isinstance(other, MpasRelativeDelta): + return NotImplemented + return self.__add__(other.__neg__()) + + def __neg__(self): + return self.__class__(years=-self.years, + months=-self.months, + days=-self.days, + hours=-self.hours, + minutes=-self.minutes, + seconds=-self.seconds, + calendar=self.calendar) + + def __mul__(self, other): + try: + f = float(other) + except TypeError: + return NotImplemented + + return self.__class__(years=int(self.years * f), + months=int(self.months * f), + days=int(self.days * f), + hours=int(self.hours * f), + minutes=int(self.minutes * f), + seconds=int(self.seconds * f), + calendar=self.calendar) + + __rmul__ = __mul__ + + def __div__(self, other): + try: + reciprocal = 1 / float(other) + except TypeError: + return NotImplemented + + return self.__mul__(reciprocal) + + __truediv__ = __div__ + + def __repr__(self): + outList = [] + for attr in ["years", "months", "days", "leapdays", + "hours", "minutes", "seconds", "microseconds"]: + value = getattr(self, attr) + if value: + outList.append("{attr}={value:+g}".format(attr=attr, + value=value)) + outList.append("calendar='{}'".format(self.calendar)) + return "{classname}({attrs})".format(classname=self.__class__.__name__, + attrs=", ".join(outList)) + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/timekeeping/utility.py b/mpas_analysis/shared/timekeeping/utility.py new file mode 100644 index 000000000..2f3c4d08c --- /dev/null +++ b/mpas_analysis/shared/timekeeping/utility.py @@ -0,0 +1,513 @@ +""" +Time keeping utility functions + +Author +------ +Xylar Asay-Davis + +Last Modified +------------- +02/11/2017 +""" + +import datetime +import netCDF4 +import numpy + +from .MpasRelativeDelta import MpasRelativeDelta + + +def get_simulation_start_time(streams): + """ + Given a StreamsFile object, returns the simulation start time parsed from + a restart file. + + Parameters + ---------- + steams : StreamsFile object + For parsing an MPAS streams file + + Returns + ------- + simulation_start_time : string + The start date of the simulation parsed from a restart file identified + by the contents of `streams`. + + Raises + ------ + IOError + If no restart file can be found. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/11/2017 + """ + + try: + restartFile = streams.readpath('restart')[0] + except ValueError: + raise IOError('No MPAS restart file found: need at least one ' + 'restart file for analysis to work correctly') + + ncFile = netCDF4.Dataset(restartFile, mode='r') + simulationStartTime = ncFile.variables['simulationStartTime'][:] + # convert from character array to str + simulationStartTime = ''.join(simulationStartTime).strip() + # replace underscores so it works as a CF-compliant reference date + simulationStartTime = simulationStartTime.replace('_', ' ') + ncFile.close() + + return simulationStartTime + + +def string_to_datetime(dateString): # {{{ + """ + Given a date string and a calendar, returns a `datetime.datetime` + + Parameters + ---------- + dateString : string + A date and time in one of the following formats: + - YYYY-MM-DD hh:mm:ss + - YYYY-MM-DD hh.mm.ss + - YYYY-MM-DD SSSSS + - DDD hh:mm:ss + - DDD hh.mm.ss + - DDD SSSSS + - hh.mm.ss + - hh:mm:ss + - YYYY-MM-DD + - YYYY-MM + - SSSSS + + Note: either underscores or spaces can be used to separate the date + from the time portion of the string. + + Returns + ------- + datetime : A `datetime.datetime` object + + Raises + ------ + ValueError + If an invalid `dateString` is supplied. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/04/2017 + """ + + (year, month, day, hour, minute, second) = \ + _parse_date_string(dateString, isInterval=False) + + return datetime.datetime(year=year, month=month, day=day, hour=hour, + minute=minute, second=second) # }}} + + +def string_to_relative_delta(dateString, calendar='gregorian'): # {{{ + """ + Given a date string and a calendar, returns an instance of + `MpasRelativeDelta` + + Parameters + ---------- + dateString : string + A date and time in one of the following formats: + - YYYY-MM-DD hh:mm:ss + - YYYY-MM-DD hh.mm.ss + - YYYY-MM-DD SSSSS + - DDD hh:mm:ss + - DDD hh.mm.ss + - DDD SSSSS + - hh.mm.ss + - hh:mm:ss + - YYYY-MM-DD + - YYYY-MM + - SSSSS + + Note: either underscores or spaces can be used to separate the date + from the time portion of the string. + + calendar: {'gregorian', 'gregorian_noleap'}, optional + The name of one of the calendars supported by MPAS cores + + Returns + ------- + relativedelta : An `MpasRelativeDelta` object + + Raises + ------ + ValueError + If an invalid `dateString` is supplied. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/04/2017 + """ + + (years, months, days, hours, minutes, seconds) = \ + _parse_date_string(dateString, isInterval=True) + + return MpasRelativeDelta(years=years, months=months, days=days, + hours=hours, minutes=minutes, seconds=seconds, + calendar=calendar) + # }}} + + +def string_to_days_since_date(dateString, calendar='gregorian', + referenceDate='0001-01-01'): + """ + Given a date string or an array-like of date strings, a reference date + string, and a calendar, returns the number of days (as a float or + numpy.array of floats) since the reference date + + Parameters + ---------- + dateStrings : str or array-like of str + A date and time (or array of date/times) in one of the following + formats: + - YYYY-MM-DD hh:mm:ss + - YYYY-MM-DD hh.mm.ss + - YYYY-MM-DD SSSSS + - DDD hh:mm:ss + - DDD hh.mm.ss + - DDD SSSSS + - hh.mm.ss + - hh:mm:ss + - YYYY-MM-DD + - YYYY-MM + - SSSSS + + Note: either underscores or spaces can be used to separate the date + from the time portion of the string. + + calendar: {'gregorian', 'gregorian_noleap'}, optional + The name of one of the calendars supported by MPAS cores + + referenceDate : str, optional + A reference date of the form: + - 0001-01-01 + - 0001-01-01 00:00:00 + + Returns + ------- + days : float or numpy.array of floats + The number of days since `referenceDate` for each date in dateString + + Raises + ------ + ValueError + If an invalid `dateString` or `calendar` is supplied. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/04/2017 + """ + + isSingleString = isinstance(dateString, str) + + if isSingleString: + dateString = [dateString] + + dates = [string_to_datetime(string) for string in dateString] + days = datetime_to_days(dates, calendar=calendar, + referenceDate=referenceDate) + + if isSingleString: + days = days[0] + else: + days = numpy.array(days) + return days + + +def days_to_datetime(days, calendar='gregorian', referenceDate='0001-01-01'): + """ + Covert days to `datetime.datetime` objects given a reference date and an + MPAS calendar (either 'gregorian' or 'gregorian_noleap'). + + Parameters + ---------- + days : float or array-like of floats + The number of days since the reference date. + + calendar : {'gregorian', 'gregorian_noleap'}, optinal + A calendar to be used to convert days to a `datetime.datetime` object. + + referenceDate : str, optional + A reference date of the form: + - 0001-01-01 + - 0001-01-01 00:00:00 + + Returns + ------- + datetime : An instance of `datetime.datetime` (or array-like of datetimes) + The days since `referenceDate` on the given `calendar`. + + Raises + ------ + ValueError + If an invalid `days`, `referenceDate` or `calendar` is supplied. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/04/2017 + """ + + datetimes = netCDF4.num2date(days, + 'days since {}'.format(referenceDate), + calendar=_mpas_to_netcdf_calendar(calendar)) + + # convert to datetime.datetime + if isinstance(datetimes, numpy.ndarray): + newDateTimes = [] + for date in datetimes.flat: + newDateTimes.append(_round_datetime(date)) + if len(newDateTimes) > 0: + datetimes = numpy.reshape(numpy.array(newDateTimes), + datetimes.shape) + + else: + datetimes = _round_datetime(datetimes) + + return datetimes + + +def datetime_to_days(dates, calendar='gregorian', referenceDate='0001-01-01'): + """ + Given date(s), a calendar and a reference date, returns the days since + the reference date, either as a single float or an array of floats. + + Parameters + ---------- + datetime : instance or array-like of datetime.datetime + The date(s) to be converted to days since `referenceDate` on the + given `calendar`. + + calendar : {'gregorian', 'gregorian_noleap'}, optional + A calendar to be used to convert days to a `datetime.datetime` object. + + referenceDate : str, optional + A reference date of the form: + - 0001-01-01 + - 0001-01-01 00:00:00 + + Returns + ------- + days : float or array of floats + The days since `referenceDate` on the given `calendar`. + + Raises + ------ + ValueError + If an invalid `datetimes`, `referenceDate` or `calendar` is supplied. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/11/2017 + """ + + isSingleDate = False + if isinstance(dates, datetime.datetime): + dates = [dates] + isSingleDate = True + + days = netCDF4.date2num(dates, 'days since {}'.format(referenceDate), + calendar=_mpas_to_netcdf_calendar(calendar)) + + if isSingleDate: + days = days[0] + + return days + + +def date_to_days(year=1, month=1, day=1, hour=0, minute=0, second=0, + calendar='gregorian', referenceDate='0001-01-01'): + """ + Given a date in the form of year, month, day, etc.; a calendar; and a + reference date, returns the days since the reference date. + + Parameters + ---------- + year, month, day, hour, minute, second : int, optional + The date to be converted to days since `referenceDate` on the + given `calendar`. + + calendar : {'gregorian', 'gregorian_noleap'}, optional + A calendar to be used to convert days to a `datetime.datetime` object. + + referenceDate : str, optional + A reference date of the form: + - 0001-01-01 + - 0001-01-01 00:00:00 + + Returns + ------- + days : float + The days since `referenceDate` on the given `calendar`. + + Raises + ------ + ValueError + If an invalid `referenceDate` or `calendar` is supplied. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/11/2017 + """ + + calendar = _mpas_to_netcdf_calendar(calendar) + + date = datetime.datetime(year, month, day, hour, minute, second) + + return netCDF4.date2num(date, 'days since {}'.format(referenceDate), + calendar=calendar) + + +def _parse_date_string(dateString, isInterval=False): # {{{ + """ + Given a string containing a date, returns a tuple defining a date of the + form (year, month, day, hour, minute, second) appropriate for constructing + a datetime or timedelta + + Parameters + ---------- + dateString : string + A date and time in one of the followingformats: + - YYYY-MM-DD hh:mm:ss + - YYYY-MM-DD hh.mm.ss + - YYYY-MM-DD SSSSS + - DDD hh:mm:ss + - DDD hh.mm.ss + - DDD SSSSS + - hh.mm.ss + - hh:mm:ss + - YYYY-MM-DD + - YYYY-MM + - SSSSS + + Note: either underscores or spaces can be used to separate the date + from the time portion of the string. + + isInterval : bool, optional + If ``isInterval=True``, the result is appropriate for constructing + a `datetime.timedelta` object rather than a `datetime`. + + Returns + ------- + date : A tuple of (year, month, day, hour, minute, second) + + Raises + ------ + ValueError + If an invalid `dateString` is supplied. + + Author + ------ + Xylar Asay-Davis + + Last modified + ------------- + 02/04/2017 + """ + if isInterval: + offset = 0 + else: + offset = 1 + + # change underscores to spaces so both can be supported + dateString = dateString.replace('_', ' ') + if ' ' in dateString: + ymd, hms = dateString.split(' ') + else: + if '-' in dateString: + ymd = dateString + # error can result if dateString = '1990-01' + # assume this means '1990-01-01' + if len(ymd.split('-')) == 2: + ymd += '-01' + hms = '00:00:00' + else: + if isInterval: + ymd = '0000-00-00' + else: + ymd = '0001-01-01' + hms = dateString + + if '.' in hms: + hms = hms.replace('.', ':') + + if '-' in ymd: + (year, month, day) \ + = [int(sub) for sub in ymd.split('-')] + else: + day = int(ymd) + year = 0 + month = offset + + if ':' in hms: + (hour, minute, second) \ + = [int(sub) for sub in hms.split(':')] + else: + second = int(hms) + minute = 0 + hour = 0 + return (year, month, day, hour, minute, second) # }}} + + +def _mpas_to_netcdf_calendar(calendar): + """ + Convert from MPAS calendar to NetCDF4 calendar names. + """ + + if calendar == 'gregorian_noleap': + calendar = 'noleap' + elif calendar != 'gregorian': + raise ValueError('Unsupported calendar {}'.format(calendar)) + return calendar + + +def _round_datetime(date): + """Round a datetime object to nearest second + date : datetime.datetime or similar objet object. + """ + (year, month, day, hour, minute, second, microsecond) = \ + (date.year, date.month, date.day, date.hour, date.minute, date.second, + date.microsecond) + + date = datetime.datetime(year=year, month=month, day=day, + hour=hour, minute=minute, + second=second) + + add_seconds = int(1e-6*microsecond+0.5) + + return date + datetime.timedelta(0, add_seconds) + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/__init__.py b/mpas_analysis/test/__init__.py index ce160cdc8..8580ad7a5 100644 --- a/mpas_analysis/test/__init__.py +++ b/mpas_analysis/test/__init__.py @@ -29,12 +29,15 @@ except ImportError: has_numpy = False + def requires_lxml(test): return test if has_lxml else unittest.skip('requires lxml')(test) + def requires_numpy(test): return test if has_numpy else unittest.skip('requires numpy')(test) + # Adapted from # http://stackoverflow.com/questions/29627341/pytest-where-to-store-expected-data @fixture @@ -58,19 +61,23 @@ def assertEqual(self, a1, a2): assert a1 == a2 or (a1 != a1 and a2 != a2) def assertLessThan(self, a1, a2): - assert a1 <= a2 - + assert a1 <= a2 + def assertGreaterThan(self, a1, a2): assert a1 >= a2 - + @requires_numpy def assertArrayEqual(self, a1, a2): np.testing.assert_array_equal(a1, a2) - + @requires_numpy def assertApproxEqual(self, a1, a2, rtol=1e-5, atol=1e-8): assert np.isclose(a1, a2, rtol=rtol, atol=atol) + @requires_numpy + def assertArrayApproxEqual(self, a1, a2, rtol=1e-5, atol=1e-8): + assert np.all(np.isclose(a1, a2, rtol=rtol, atol=atol)) + @contextmanager def assertWarns(self, message): with warnings.catch_warnings(record=True) as w: diff --git a/mpas_analysis/test/test_climatology.py b/mpas_analysis/test/test_climatology.py new file mode 100644 index 000000000..925380873 --- /dev/null +++ b/mpas_analysis/test/test_climatology.py @@ -0,0 +1,394 @@ +""" +Unit test infrastructure for climatologies. + +Xylar Asay-Davis +04/11/2017 +""" + +import pytest +import tempfile +import shutil +import os +import numpy +import xarray + +from mpas_analysis.test import TestCase, loaddatadir +from mpas_analysis.shared.generalized_reader.generalized_reader \ + import open_multifile_dataset +from mpas_analysis.configuration.MpasAnalysisConfigParser \ + import MpasAnalysisConfigParser +from mpas_analysis.shared.climatology import climatology +from mpas_analysis.shared.constants import constants + + +@pytest.mark.usefixtures("loaddatadir") +class TestClimatology(TestCase): + + def setUp(self): + # Create a temporary directory + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + # Remove the directory after the test + shutil.rmtree(self.test_dir) + + def setup_config(self, autocloseFileLimitFraction=0.5, + maxChunkSize=10000): + config = MpasAnalysisConfigParser() + config.add_section('input') + config.set('input', 'autocloseFileLimitFraction', + str(autocloseFileLimitFraction)) + config.set('input', 'maxChunkSize', str(maxChunkSize)) + config.set('input', 'mpasMeshName', 'QU240') + + config.add_section('output') + config.set('output', 'baseDirectory', self.test_dir) + config.set('output', 'mappingSubdirectory', '.') + config.set('output', 'mpasClimatologySubdirectory', 'clim/mpas') + config.set('output', 'mpasRegriddedClimSubdirectory', + 'clim/mpas/regrid') + + config.add_section('climatology') + config.set('climatology', 'startYear', '2') + config.set('climatology', 'endYear', '2') + config.set('climatology', 'comparisonLatResolution', '0.5') + config.set('climatology', 'comparisonLonResolution', '0.5') + + config.set('climatology', 'overwriteMapping', 'False') + config.set('climatology', 'overwriteMpasClimatology', 'False') + config.set('climatology', 'mpasInterpolationMethod', 'bilinear') + + config.add_section('oceanObservations') + config.set('oceanObservations', 'interpolationMethod', 'bilinear') + config.set('oceanObservations', 'climatologySubdirectory', 'clim/obs') + config.set('oceanObservations', 'regriddedClimSubdirectory', + 'clim/obs/regrid') + + return config + + def test_write_mpas_mapping_file(self): + config = self.setup_config() + mpasMeshFileName = '{}/mpasMesh.nc'.format(self.datadir) + climatology.write_mpas_mapping_file(config, mpasMeshFileName) + + mappingFileName = '{}/map_QU240_to_0.5x0.5degree_' \ + 'bilinear.nc'.format(self.test_dir) + assert os.path.exists(mappingFileName) + + mappingFileName = '{}/mapping.nc'.format(self.test_dir) + config.set('climatology', 'mpasMappingFile', mappingFileName) + climatology.write_mpas_mapping_file(config, mpasMeshFileName) + assert os.path.exists(mappingFileName) + + def test_write_observations_mapping_file(self): + config = self.setup_config() + gridFileName = '{}/obsGrid.nc'.format(self.datadir) + componentName = 'ocean' + fieldName = 'sst' + climatology.write_observations_mapping_file(config, + componentName, + fieldName, + gridFileName, + latVarName='lat', + lonVarName='lon') + + mappingFileName = '{}/map_obs_{}_1.0x1.0degree_to_0.5x0.5degree_' \ + 'bilinear.nc'.format(self.test_dir, fieldName) + assert os.path.exists(mappingFileName) + + mappingFileName = '{}/mapping.nc'.format(self.test_dir) + config.set('oceanObservations', 'sstClimatologyMappingFile', + mappingFileName) + climatology.write_observations_mapping_file(config, + componentName, + fieldName, + gridFileName, + latVarName='lat', + lonVarName='lon') + assert os.path.exists(mappingFileName) + + def test_get_mpas_climatology_file_names(self): + config = self.setup_config() + fieldName = 'sst' + monthNames = 'JFM' + (climatologyFileName, climatologyPrefix, regriddedFileName) = \ + climatology.get_mpas_climatology_file_names(config, fieldName, + monthNames) + expectedClimatologyFileName = '{}/clim/mpas/sst_QU240_JFM_' \ + 'years0002-0002.nc'.format(self.test_dir) + self.assertEqual(climatologyFileName, expectedClimatologyFileName) + + expectedClimatologyPrefix = '{}/clim/mpas/sst_QU240_' \ + 'JFM'.format(self.test_dir) + self.assertEqual(climatologyPrefix, expectedClimatologyPrefix) + + expectedRegriddedFileName = '{}/clim/mpas/regrid/sst_QU240_to_' \ + '0.5x0.5degree_JFM_' \ + 'years0002-0002.nc'.format(self.test_dir) + self.assertEqual(regriddedFileName, expectedRegriddedFileName) + + def test_get_observation_climatology_file_names(self): + config = self.setup_config() + fieldName = 'sst' + monthNames = 'JFM' + gridFileName = '{}/obsGrid.nc'.format(self.datadir) + componentName = 'ocean' + (climatologyFileName, regriddedFileName) = \ + climatology.get_observation_climatology_file_names( + config, fieldName, monthNames, componentName, gridFileName, + latVarName='lat', lonVarName='lon') + expectedClimatologyFileName = '{}/clim/obs/sst_1.0x1.0degree_' \ + 'JFM.nc'.format(self.test_dir) + self.assertEqual(climatologyFileName, expectedClimatologyFileName) + + expectedRegriddedFileName = '{}/clim/obs/regrid/sst_1.0x1.0degree_' \ + 'to_0.5x0.5degree_' \ + 'JFM.nc'.format(self.test_dir) + self.assertEqual(regriddedFileName, expectedRegriddedFileName) + + def open_test_ds(self, config, calendar): + fileNames = ['{}/timeSeries.0002-{:02d}-01.nc'.format(self.datadir, + month) + for month in [1, 2, 3]] + + variableMap = {'mld': ['timeMonthly_avg_tThreshMLD'], + 'Time': [['xtime_startMonthly', 'xtime_endMonthly']]} + variableList = ['mld'] + + ds = open_multifile_dataset( + fileNames=fileNames, + calendar=calendar, + config=config, + timeVariableName='Time', + variableList=variableList, + variableMap=variableMap) + + assert(len(ds.Time) == 3) + return ds + + def test_compute_climatology(self): + config = self.setup_config() + calendar = 'gregorian_noleap' + ds = self.open_test_ds(config, calendar) + + assert('month' not in ds.coords.keys()) + assert('daysInMonth' not in ds.coords.keys()) + + # test add_months_and_days_in_month + ds = climatology.add_years_months_days_in_month(ds, calendar) + + self.assertArrayEqual(ds.month.values, [1, 2, 3]) + self.assertArrayEqual(numpy.round(ds.daysInMonth.values), [31, 28, 31]) + + # test compute_climatology on a data set + monthNames = 'JFM' + monthValues = constants.monthDictionary[monthNames] + dsClimatology = climatology.compute_climatology(ds, monthValues, + calendar) + + assert('Time' not in dsClimatology.dims.keys()) + + self.assertEqual(dsClimatology.data_vars.keys(), ['mld']) + + climFileName = '{}/refSeasonalClim.nc'.format(self.datadir) + refClimatology = xarray.open_dataset(climFileName) + self.assertArrayApproxEqual(dsClimatology.mld.values, + refClimatology.mld.values) + + # test compute_climatology on a data array + mldClimatology = climatology.compute_climatology(ds.mld, monthValues, + calendar) + + assert('Time' not in mldClimatology.dims) + + self.assertArrayApproxEqual(dsClimatology.mld.values, + mldClimatology.values) + + # for good measure... + self.assertArrayApproxEqual(mldClimatology.values, + refClimatology.mld.values) + + def test_compute_monthly_climatology(self): + config = self.setup_config() + calendar = 'gregorian_noleap' + ds = self.open_test_ds(config, calendar) + + monthlyClimatology = climatology.compute_monthly_climatology(ds, + calendar) + + assert(len(monthlyClimatology.month) == 3) + + self.assertEqual(monthlyClimatology.data_vars.keys(), ['mld']) + + climFileName = '{}/refMonthlyClim.nc'.format(self.datadir) + refClimatology = xarray.open_dataset(climFileName) + + self.assertArrayApproxEqual(monthlyClimatology.mld.values, + refClimatology.mld.values) + + self.assertArrayApproxEqual(monthlyClimatology.month.values, + refClimatology.month.values) + + def test_update_start_end_year(self): + config = self.setup_config() + calendar = 'gregorian_noleap' + ds = self.open_test_ds(config, calendar) + + changed, startYear, endYear = \ + climatology.update_start_end_year(ds, config, calendar) + + assert(not changed) + assert(startYear == 2) + assert(endYear == 2) + + config.set('climatology', 'endYear', '50') + ds = self.open_test_ds(config, calendar) + + with self.assertWarns('climatology start and/or end year different ' + 'from requested'): + changed, startYear, endYear = \ + climatology.update_start_end_year(ds, config, calendar) + + assert(changed) + assert(startYear == 2) + assert(endYear == 2) + + def test_cache_climatologies(self): + config = self.setup_config() + calendar = 'gregorian_noleap' + ds = self.open_test_ds(config, calendar) + fieldName = 'mld' + climFileName = '{}/refSeasonalClim.nc'.format(self.datadir) + refClimatology = xarray.open_dataset(climFileName) + + tests = [] + # test1: Just January, 1-year climatologies are cached; only one file + # is produced with suffix year0002; a second run of + # cache_climatologies doesn't modify any files + test1 = {'monthNames': 'Jan', + 'monthValues': [1], + 'yearsPerCacheFile': 1, + 'expectedSuffixes': ['year0002'], + 'expectedModified': [False], + # weird value because first time step of Jan. missing in ds + 'expectedDays': 30.958333, + 'expectedMonths': 1, + 'refClimatology': None} + tests.append(test1) + # same as test1 but with JFM + test2 = {'monthNames': 'JFM', + 'monthValues': constants.monthDictionary['JFM'], + 'yearsPerCacheFile': 1, + 'expectedSuffixes': ['year0002'], + 'expectedModified': [False], + # weird value because first time step of Jan. missing in ds + 'expectedDays': 89.958333, + 'expectedMonths': 3, + 'refClimatology': refClimatology} + tests.append(test2) + # test3: 2-year climatologies are cached; 2 files are produced + # with suffix years0002-0003 (the "individual" climatology + # file) and year0002 (the "aggregated" climatology file); + # a second tries to update the "individual" cache file + # because it appears to be incomplete but does not attempt + # to update the aggregated climatology file because no + # additional years were processed and the file was already + # complete for the span of years present + test2 = (2, ['years0002-0003', 'year0002'], [True, False]) + test3 = {'monthNames': 'Jan', + 'monthValues': [1], + 'yearsPerCacheFile': 2, + 'expectedSuffixes': ['years0002-0003', 'year0002'], + 'expectedModified': [True, False], + # weird value because first time step of Jan. missing in ds + 'expectedDays': 30.958333, + 'expectedMonths': 1, + 'refClimatology': None} + tests.append(test3) + # test4: same as test3 but with JFM + test4 = {'monthNames': 'JFM', + 'monthValues': constants.monthDictionary['JFM'], + 'yearsPerCacheFile': 2, + 'expectedSuffixes': ['years0002-0003', 'year0002'], + 'expectedModified': [True, False], + # weird value because first time step of Jan. missing in ds + 'expectedDays': 89.958333, + 'expectedMonths': 3, + 'refClimatology': refClimatology} + tests.append(test4) + + for test in tests: + monthNames = test['monthNames'] + monthValues = test['monthValues'] + yearsPerCacheFile = test['yearsPerCacheFile'] + expectedSuffixes = test['expectedSuffixes'] + expectedModified = test['expectedModified'] + expectedDays = test['expectedDays'] + expectedMonths = test['expectedMonths'] + refClimatology = test['refClimatology'] + + (climatologyFileName, climatologyPrefix, regriddedFileName) = \ + climatology.get_mpas_climatology_file_names(config, fieldName, + monthNames) + + config.set('climatology', 'yearsPerCacheFile', + str(yearsPerCacheFile)) + # once without cache files + dsClimatology = climatology.cache_climatologies( + ds, monthValues, config, climatologyPrefix, calendar, + printProgress=True) + if refClimatology is not None: + self.assertArrayApproxEqual(dsClimatology.mld.values, + refClimatology.mld.values) + + self.assertEqual(dsClimatology.attrs['totalMonths'], + expectedMonths) + self.assertApproxEqual(dsClimatology.attrs['totalDays'], + expectedDays) + dsClimatology.close() + + datesModfied = [] + for suffix in expectedSuffixes: + expectedClimatologyFileName = '{}/clim/mpas/mld_QU240_' \ + '{}_{}.nc'.format( + self.test_dir, monthNames, + suffix) + assert os.path.exists(expectedClimatologyFileName) + + datesModfied.append(os.path.getmtime( + expectedClimatologyFileName)) + + # try it again with cache files saved + dsClimatology = climatology.cache_climatologies( + ds, monthValues, config, climatologyPrefix, calendar, + printProgress=True) + + if refClimatology is not None: + self.assertArrayApproxEqual(dsClimatology.mld.values, + refClimatology.mld.values) + + self.assertEqual(dsClimatology.attrs['totalMonths'], + expectedMonths) + self.assertApproxEqual(dsClimatology.attrs['totalDays'], + expectedDays) + dsClimatology.close() + + for index, suffix in enumerate(expectedSuffixes): + expectedClimatologyFileName = '{}/clim/mpas/mld_QU240_' \ + '{}_{}.nc'.format( + self.test_dir, monthNames, + suffix) + + dateModifiedCheck = os.path.getmtime( + expectedClimatologyFileName) + + # Check whether the given file was modified, and whether + # this was the expected result + fileWasModified = datesModfied[index] != dateModifiedCheck + assert fileWasModified == expectedModified[index] + + # remove the cache file for the next try + os.remove(expectedClimatologyFileName) + + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_climatology/mpasMesh.nc b/mpas_analysis/test/test_climatology/mpasMesh.nc new file mode 120000 index 000000000..880a52c2e --- /dev/null +++ b/mpas_analysis/test/test_climatology/mpasMesh.nc @@ -0,0 +1 @@ +../test_interpolate/mpasMesh.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_climatology/obsGrid.nc b/mpas_analysis/test/test_climatology/obsGrid.nc new file mode 100644 index 000000000..e595400d1 Binary files /dev/null and b/mpas_analysis/test/test_climatology/obsGrid.nc differ diff --git a/mpas_analysis/test/test_climatology/refMonthlyClim.nc b/mpas_analysis/test/test_climatology/refMonthlyClim.nc new file mode 100644 index 000000000..6f0b8e589 Binary files /dev/null and b/mpas_analysis/test/test_climatology/refMonthlyClim.nc differ diff --git a/mpas_analysis/test/test_climatology/refSeasonalClim.nc b/mpas_analysis/test/test_climatology/refSeasonalClim.nc new file mode 100644 index 000000000..cdf54bac9 Binary files /dev/null and b/mpas_analysis/test/test_climatology/refSeasonalClim.nc differ diff --git a/mpas_analysis/test/test_climatology/timeSeries.0002-01-01.nc b/mpas_analysis/test/test_climatology/timeSeries.0002-01-01.nc new file mode 120000 index 000000000..6c56dfeb4 --- /dev/null +++ b/mpas_analysis/test/test_climatology/timeSeries.0002-01-01.nc @@ -0,0 +1 @@ +../test_interpolate/timeSeries.0002-01-01.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_climatology/timeSeries.0002-02-01.nc b/mpas_analysis/test/test_climatology/timeSeries.0002-02-01.nc new file mode 120000 index 000000000..7d127a423 --- /dev/null +++ b/mpas_analysis/test/test_climatology/timeSeries.0002-02-01.nc @@ -0,0 +1 @@ +../test_interpolate/timeSeries.0002-02-01.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_climatology/timeSeries.0002-03-01.nc b/mpas_analysis/test/test_climatology/timeSeries.0002-03-01.nc new file mode 120000 index 000000000..a7e210ef8 --- /dev/null +++ b/mpas_analysis/test/test_climatology/timeSeries.0002-03-01.nc @@ -0,0 +1 @@ +../test_interpolate/timeSeries.0002-03-01.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_date.py b/mpas_analysis/test/test_date.py deleted file mode 100644 index 4ef4d9002..000000000 --- a/mpas_analysis/test/test_date.py +++ /dev/null @@ -1,155 +0,0 @@ -""" -Unit test infrastructure for the Date class - -Xylar Asay-Davis -11/02/2016 -""" - -import pytest -import datetime -from mpas_analysis.test import TestCase, loaddatadir -from mpas_analysis.shared.timekeeping.Date import Date - -@pytest.mark.usefixtures("loaddatadir") -class TestDate(TestCase): - def test_date(self): - - # test each possible format: - # YYYY-MM-DD_hh:mm:ss - # YYYY-MM-DD_hh.mm.ss - # YYYY-MM-DD_SSSSS - # DDD_hh:mm:ss - # DDD_hh.mm.ss - # DDD_SSSSS - # hh.mm.ss - # hh:mm:ss - # YYYY-MM-DD - # SSSSS - - # test with isInterval == False - # YYYY-MM-DD_hh:mm:ss - date1 = Date(dateString='0001-01-01_00:00:00', isInterval=False) - date2 = Date(years=1, months=0, days=0, hours=0, minutes=0, seconds=0, - isInterval=False) - self.assertEqual(date1, date2) - - # test with isInterval == True - # YYYY-MM-DD_hh:mm:ss - date1 = Date(dateString='0001-00-00_00:00:00', isInterval=True) - date2 = Date(years=1, months=0, days=0, hours=0, minutes=0, seconds=0, - isInterval=True) - self.assertEqual(date1, date2) - - # YYYY-MM-DD_hh.mm.ss - date1 = Date(dateString='0001-01-02_00.01.00', isInterval=False) - date2 = Date(years=1, months=0, days=1, hours=0, minutes=1, seconds=0, - isInterval=False) - self.assertEqual(date1, date2) - - # YYYY-MM-DD_SSSSS - date1 = Date(dateString='0001-01-01_00002', isInterval=False) - date2 = Date(years=1, months=0, days=0, hours=0, minutes=0, seconds=2, - isInterval=False) - self.assertEqual(date1, date2) - - # DDD_hh:mm:ss - date1 = Date(dateString='0001_00:00:01', isInterval=True) - date2 = Date(years=0, months=0, days=1, hours=0, minutes=0, seconds=1, - isInterval=True) - self.assertEqual(date1, date2) - - # DDD_hh.mm.ss - date1 = Date(dateString='0002_01.00.01', isInterval=True) - date2 = Date(years=0, months=0, days=2, hours=1, minutes=0, seconds=1, - isInterval=True) - self.assertEqual(date1, date2) - - # DDD_SSSSS - date1 = Date(dateString='0002_00003', isInterval=True) - date2 = Date(years=0, months=0, days=2, hours=0, minutes=0, seconds=3, - isInterval=True) - self.assertEqual(date1, date2) - - # hh:mm:ss - date1 = Date(dateString='00:00:01', isInterval=False) - date2 = Date(years=0, months=0, days=0, hours=0, minutes=0, seconds=1, - isInterval=False) - self.assertEqual(date1, date2) - - # hh.mm.ss - date1 = Date(dateString='00.00.01', isInterval=True) - date2 = Date(years=0, months=0, days=0, hours=0, minutes=0, seconds=1, - isInterval=True) - self.assertEqual(date1, date2) - - # YYYY-MM-DD - date1 = Date(dateString='0001-01-01', isInterval=False) - date2 = Date(years=1, months=0, days=0, hours=0, minutes=0, seconds=0, - isInterval=False) - self.assertEqual(date1, date2) - - # SSSSS - date1 = Date(dateString='00005', isInterval=True) - date2 = Date(years=0, months=0, days=0, hours=0, minutes=0, seconds=5, - isInterval=True) - self.assertEqual(date1, date2) - - - # test operators - date1 = Date(dateString='1992-02-01', isInterval=False) - date2 = Date(dateString='1991-03-01', isInterval=False) - diff = date1-date2 - self.assertEqual(diff, Date(dateString='0000-11-00', isInterval=True)) - self.assertEqual(date1 < date2, False) - self.assertEqual(date2 < date1, True) - self.assertEqual(date1 < date1, False) - - date1 = Date(dateString='1996-01-15', isInterval=False) - date2 = Date(dateString='0005-00-00', isInterval=True) - diff = date1-date2 - self.assertEqual(diff, Date(dateString='1991-01-15', isInterval=False)) - - date1 = Date(dateString='1996-01-15', isInterval=False) - date2 = Date(dateString='0000-02-00', isInterval=True) - diff = date1-date2 - self.assertEqual(diff, Date(dateString='1995-11-15', isInterval=False)) - - date1 = Date(dateString='1996-01-15', isInterval=False) - date2 = Date(dateString='0000-00-20', isInterval=True) - diff = date1-date2 - self.assertEqual(diff, Date(dateString='1995-12-26', isInterval=False)) - - date = Date(dateString='1996-01-15', isInterval=False) - datetime1 = date.to_datetime(yearOffset=0) - datetime2 = datetime.datetime(year=1996, month=1, day=15) - self.assertEqual(datetime1, datetime2) - - date = Date(dateString='0000-00-20', isInterval=True) - timedelta1 = date.to_timedelta() - timedelta2 = datetime.timedelta(days=20) - self.assertEqual(timedelta1, timedelta2) - - # since pandas and xarray use the numpy type 'datetime[ns]`, which - # has a limited range of dates, the date 0001-01-01 gets increased to - # the minimum allowed year boundary, 1678-01-01 to avoid invalid - # dates. - date = Date(dateString='0001-01-01', isInterval=False) - datetime1 = date.to_datetime(yearOffset=0) - datetime2 = datetime.datetime(year=1678, month=1, day=1) - self.assertEqual(datetime1, datetime2) - - date = Date(dateString='0001-01-01', isInterval=False) - datetime1 = date.to_datetime(yearOffset=1849) - datetime2 = datetime.datetime(year=1850, month=1, day=1) - self.assertEqual(datetime1, datetime2) - - # since pandas and xarray use the numpy type 'datetime[ns]`, which - # has a limited range of dates, the date 9999-01-01 gets decreased to - # the maximum allowed year boundary, 2262-01-01 to avoid invalid - # dates. - date = Date(dateString='9999-01-01', isInterval=False) - datetime1 = date.to_datetime(yearOffset=0) - datetime2 = datetime.datetime(year=2262, month=1, day=1) - self.assertEqual(datetime1, datetime2) - -# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_generalized_reader.py b/mpas_analysis/test/test_generalized_reader.py new file mode 100644 index 000000000..4ced25802 --- /dev/null +++ b/mpas_analysis/test/test_generalized_reader.py @@ -0,0 +1,156 @@ +""" +Unit test infrastructure for the generalized_reader. + +Xylar Asay-Davis +02/16/2017 +""" + +import pytest +from mpas_analysis.test import TestCase, loaddatadir +from mpas_analysis.shared.generalized_reader.generalized_reader \ + import open_multifile_dataset +from mpas_analysis.configuration.MpasAnalysisConfigParser \ + import MpasAnalysisConfigParser + + +@pytest.mark.usefixtures("loaddatadir") +class TestGeneralizedReader(TestCase): + + def setup_config(self, autocloseFileLimitFraction=0.5, + maxChunkSize=10000): + config = MpasAnalysisConfigParser() + config.add_section('input') + config.set('input', 'autocloseFileLimitFraction', + str(autocloseFileLimitFraction)) + config.set('input', 'maxChunkSize', str(maxChunkSize)) + return config + + def test_variableMap(self): + fileName = str(self.datadir.join('example_jan.nc')) + simulationStartTime = '0001-01-01' + variableMap = { + 'avgSurfaceTemperature': + ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature', + 'other_string', + 'yet_another_string'], + 'daysSinceStartOfSim': + ['time_avg_daysSinceStartOfSim', + 'xtime', + 'something_else'], + 'avgLayerTemperature': + ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', + 'test1', + 'test2'], + 'Time': [['xtime_start', 'xtime_end'], + 'time_avg_daysSinceStartOfSim']} + + variableList = ['avgSurfaceTemperature', 'avgLayerTemperature', + 'refBottomDepth', 'daysSinceStartOfSim'] + + config = self.setup_config() + for calendar in ['gregorian', 'gregorian_noleap']: + # preprocess_mpas will use variableMap to map the variable names + # from their values in the file to the desired values in + # variableList + ds = open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + config=config, + simulationStartTime=simulationStartTime, + timeVariableName='Time', + variableList=variableList, + variableMap=variableMap) + + # make sure the remapping happened as expected + self.assertEqual(sorted(ds.data_vars.keys()), sorted(variableList)) + + def test_open_dataset_fn(self): + fileName = str(self.datadir.join('example_jan.nc')) + timestr = ['xtime_start', 'xtime_end'] + variableList = \ + ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] + + config = self.setup_config() + for calendar in ['gregorian', 'gregorian_noleap']: + ds = open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + config=config, + timeVariableName=timestr, + variableList=variableList) + self.assertEqual(ds.data_vars.keys(), variableList) + + def test_start_end(self): + fileName = str(self.datadir.join('example_jan_feb.nc')) + timestr = ['xtime_start', 'xtime_end'] + variableList = \ + ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] + + config = self.setup_config() + for calendar in ['gregorian', 'gregorian_noleap']: + # all dates + ds = open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + config=config, + timeVariableName=timestr, + variableList=variableList, + startDate='0001-01-01', + endDate='9999-12-31') + self.assertEqual(len(ds.Time), 2) + + # just the first date + ds = open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + config=config, + timeVariableName=timestr, + variableList=variableList, + startDate='0005-01-01', + endDate='0005-02-01') + self.assertEqual(len(ds.Time), 1) + + # just the second date + ds = open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + config=config, + timeVariableName=timestr, + variableList=variableList, + startDate='0005-02-01', + endDate='0005-03-01') + self.assertEqual(len(ds.Time), 1) + + def test_open_process_climatology(self): + fileNames = [str(self.datadir.join('timeSeries.0002-{:02d}-01.nc'.format(month))) + for month in [1, 2, 3]] + calendar = 'gregorian_noleap' + variableMap = {'mld': ['timeMonthly_avg_tThreshMLD'], + 'Time': [['xtime_startMonthly', 'xtime_endMonthly']]} + annualClimatologies = [] + for frac, autoclose in zip([1.0, 0.], [False, True]): + # effectively, test with autoclose=False and autoclose=True + config = self.setup_config(autocloseFileLimitFraction=frac) + ds = open_multifile_dataset( + fileNames=fileNames, + calendar=calendar, + config=config, + timeVariableName='Time', + variableList=['mld'], + variableMap=variableMap) + + # note, the asserts for autoclose below are only guaranteed + # to work immediately following call to open_multifile_dataset + assert hasattr(ds, '_autoclose'), \ + '`autoclose` not defined for dataset' + if hasattr(ds, '_autoclose'): + assert ds._autoclose == int(autoclose), \ + ('`autoclose` used for dataset is inconsistent ' + 'with expected test value.') + + annualClimatologies.append(ds.mean(dim='Time')) + + self.assertArrayEqual(annualClimatologies[0].mld.values, + annualClimatologies[1].mld.values) + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_generalized_reader/example_jan.nc b/mpas_analysis/test/test_generalized_reader/example_jan.nc new file mode 120000 index 000000000..da89ad526 --- /dev/null +++ b/mpas_analysis/test/test_generalized_reader/example_jan.nc @@ -0,0 +1 @@ +../test_mpas_xarray/example_jan.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_generalized_reader/example_jan_feb.nc b/mpas_analysis/test/test_generalized_reader/example_jan_feb.nc new file mode 120000 index 000000000..67fdfa73b --- /dev/null +++ b/mpas_analysis/test/test_generalized_reader/example_jan_feb.nc @@ -0,0 +1 @@ +../test_mpas_xarray/example_jan_feb.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_generalized_reader/mpasMesh.nc b/mpas_analysis/test/test_generalized_reader/mpasMesh.nc new file mode 120000 index 000000000..880a52c2e --- /dev/null +++ b/mpas_analysis/test/test_generalized_reader/mpasMesh.nc @@ -0,0 +1 @@ +../test_interpolate/mpasMesh.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_generalized_reader/timeSeries.0002-01-01.nc b/mpas_analysis/test/test_generalized_reader/timeSeries.0002-01-01.nc new file mode 120000 index 000000000..6c56dfeb4 --- /dev/null +++ b/mpas_analysis/test/test_generalized_reader/timeSeries.0002-01-01.nc @@ -0,0 +1 @@ +../test_interpolate/timeSeries.0002-01-01.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_generalized_reader/timeSeries.0002-02-01.nc b/mpas_analysis/test/test_generalized_reader/timeSeries.0002-02-01.nc new file mode 120000 index 000000000..7d127a423 --- /dev/null +++ b/mpas_analysis/test/test_generalized_reader/timeSeries.0002-02-01.nc @@ -0,0 +1 @@ +../test_interpolate/timeSeries.0002-02-01.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_generalized_reader/timeSeries.0002-03-01.nc b/mpas_analysis/test/test_generalized_reader/timeSeries.0002-03-01.nc new file mode 120000 index 000000000..a7e210ef8 --- /dev/null +++ b/mpas_analysis/test/test_generalized_reader/timeSeries.0002-03-01.nc @@ -0,0 +1 @@ +../test_interpolate/timeSeries.0002-03-01.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_interpolate.py b/mpas_analysis/test/test_interpolate.py index 4d437efc4..c7bd72299 100644 --- a/mpas_analysis/test/test_interpolate.py +++ b/mpas_analysis/test/test_interpolate.py @@ -1,134 +1,146 @@ """ -Tests for the interpolation module +Unit test infrastructure for horizontal interpolation. -author: Luke Van Roekel -date: 10-24-2016 +Xylar Asay-Davis +02/25/2017 """ -import numpy as np -from mpas_analysis.shared.interpolation.interpolate import interp_fields, init_tree, lon_lat_to_cartesian -from mpas_analysis.test import TestCase - +import pytest +import shutil +import os +import tempfile +import numpy + +from mpas_analysis.shared.interpolation import interpolate +from mpas_analysis.test import TestCase, loaddatadir +from mpas_analysis.configuration.MpasAnalysisConfigParser \ + import MpasAnalysisConfigParser + + +@pytest.mark.usefixtures("loaddatadir") class TestInterp(TestCase): - def test_lat_to_cartesian(self): + + def setUp(self): + # Create a temporary directory + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + # Remove the directory after the test + shutil.rmtree(self.test_dir) + + def test_destination_from_grid_file(self): """ - Test that input lat lon arrays are converted appropriately - - Author: Luke Van Roekel - date: 10-25-2016 - """ - - lat_input = np.deg2rad(np.array([-90, 90, 0, 0, 0, 0, 45, 45, 45, 45])) - lon_input = np.deg2rad(np.array([0, 0, 0, 90, 180, 270, 0, 90, 180, 270])) - - x_input = np.cos(lat_input) * np.cos(lon_input) - y_input = np.cos(lat_input) * np.sin(lon_input) - z_input = np.sin(lat_input) - - x, y, z = lon_lat_to_cartesian(lon_input, lat_input, R=1.0) - - self.assertArrayEqual(x, x_input) - self.assertArrayEqual(y, y_input) - self.assertArrayEqual(z, z_input) - - def test_target_grid_size(self): + test horizontal interpolation from an MPAS mesh to a destination + lat/lon grid determined from a file containing 'lat' and 'lon' coords + + Xylar Asay-Davis + 02/25/2017 """ - Test that target lat lon grid is produced with correct size - - Author: Luke Van Roekel - date: 10-25-2016 - """ - - lat_input = np.deg2rad(np.array([0, 0, 0, 0, 45, 45, 45, 45])) - lon_input = np.deg2rad(np.array([0, 90, 180, 270, 0, 90, 180, 270])) - - lon_min = -90 - lon_max = 90 - lat_min = -45 - lat_max = 45 - dLon = 30 - dLat = 30 - - d, inds, lonTarg, latTarg = init_tree(lon_input, lat_input, lon_min, lon_max, - lat_min, lat_max, dLon, dLat) - - nLonExpected = (lon_max - lon_min) / dLon - nLatExpected = (lat_max - lat_min) / dLat - - # LonTarg and LatTarg should be 2-D arrays - self.assertEqual(len(lonTarg.shape), 2) - self.assertEqual(len(latTarg.shape), 2) - - # LonTarg and latTarg should have expected sizes - self.assertEqual(lonTarg.shape[0], nLonExpected) - self.assertEqual(latTarg.shape[0], nLonExpected) - self.assertEqual(lonTarg.shape[1], nLatExpected) - self.assertEqual(latTarg.shape[1], nLatExpected) - - def test_target_grid_bounds(self): + + mpasMeshFileName = str(self.datadir.join('mpasMesh.nc')) + latLonGridFileName = str(self.datadir.join('SST_annual_1870-1900.nc')) + timeSeriesFileName = str(self.datadir.join('timeSeries.0002-01-01.nc')) + + suffix = 'destination_from_grid_file' + weightFileName = '{}/weights_{}.nc'.format(self.test_dir, suffix) + outFileName = '{}/remapped_{}.nc'.format(self.test_dir, suffix) + + interpolate.build_remap_weights(sourceFileName=mpasMeshFileName, + outWeightFileName=weightFileName, + destintionFileName=latLonGridFileName, + destintionLatVarName='lat', + destintionLonVarName='lon', + sourceFileType='mpas', + method='bilinear') + + assert os.path.exists(weightFileName) + + interpolate.remap(inFileName=timeSeriesFileName, + outFileName=outFileName, + inWeightFileName=weightFileName, + sourceFileType='mpas') + + assert os.path.exists(outFileName) + + # TODO: check the results against a reference result + + def test_destination_from_numpy_lat_lon(self): + """ + test horizontal interpolation from an MPAS mesh to a destination + lat/lon grid determined from config options 'lat' and 'lon'. + + Xylar Asay-Davis + 02/25/2017 + """ + + configPath = str(self.datadir.join('config.analysis')) + config = MpasAnalysisConfigParser() + config.read(configPath) + + lat = numpy.array(config.getExpression('interpolate', 'lat', + usenumpyfunc=True)) + lon = numpy.array(config.getExpression('interpolate', 'lon', + usenumpyfunc=True)) + + mpasMeshFileName = str(self.datadir.join('mpasMesh.nc')) + timeSeriesFileName = str(self.datadir.join('timeSeries.0002-01-01.nc')) + + suffix = 'destination_from_config_options' + weightFileName = '{}/weights_{}.nc'.format(self.test_dir, suffix) + outFileName = '{}/remapped_{}.nc'.format(self.test_dir, suffix) + + interpolate.build_remap_weights(sourceFileName=mpasMeshFileName, + outWeightFileName=weightFileName, + sourceFileType='mpas', + method='bilinear', + destinationLat=lat, + destinationLon=lon) + + assert os.path.exists(weightFileName) + + interpolate.remap(inFileName=timeSeriesFileName, + outFileName=outFileName, + inWeightFileName=weightFileName, + sourceFileType='mpas') + + assert os.path.exists(outFileName) + + # TODO: check the results against a reference result + + def test_source_lat_lon(self): """ - Test that target lat lon grid is produced with correct bounds - - Author: Luke Van Roekel - date: 10-25-2016 - """ - - lat_input = np.deg2rad(np.array([0, 0, 0, 0, 45, 45, 45, 45])) - lon_input = np.deg2rad(np.array([0, 90, 180, 270, 0, 90, 180, 270])) - - lon_min = -180 - lon_max = 180 - lat_min = -50 - lat_max = 50 - dLon = 20 - dLat = 20 - - d, inds, lonTarg, latTarg = init_tree(lon_input, lat_input, lon_min, lon_max, - lat_min, lat_max, dLon, dLat) - - # LonTarg should respect bounds defined by user - self.assertLessThan(lonTarg.max(), lon_max) - self.assertGreaterThan(lonTarg.min(), lon_min) - - self.assertLessThan(latTarg.max(), lat_max) - self.assertGreaterThan(latTarg.min(), lat_min) - - def test_interp(self): + test horizontal interpolation from a lat/lon grid to a destination + lat/lon grid determined from config options 'lat' and 'lon'. + + Xylar Asay-Davis + 02/25/2017 """ - Test that nearest neighbor interpolation works as expected - - Author: Luke Van Roekel - date: 10-25-2016 - """ - lat_input = np.deg2rad(np.array([-45, -45, 45, 45])) - lon_input = np.deg2rad(np.array([-90, 90, -90, 90])) - vals_input = np.array([1, 2, 3, 4]) - - lon_min = -90 - lon_max = 90 - lat_min = -45 - lat_max = 45 - dLon = 30 - dLat = 30 - - d, inds, lonTarg, latTarg = init_tree(lon_input, lat_input, lon_min, lon_max, - lat_min, lat_max, dLon, dLat) - - vals_output = interp_fields(vals_input, d, inds, lonTarg) - - #Test a few ramdom spots for nearest neighbor - #One exact spot - self.assertEqual(vals_output[0, 0], vals_input[0]) - - # Close point to val_input(0) - self.assertEqual(vals_output[2, 0], vals_input[0]) - - # Close to point val_input(1) - self.assertEqual(vals_output[4, 0], vals_input[1]) - - #Close to val_input(2) - self.assertEqual(vals_output[2, 2], vals_input[2]) - - #Close to val_input(3) - self.assertEqual(vals_output[4, 2], vals_input[3]) - \ No newline at end of file + + lat = numpy.linspace(-90., 90., 361) + lon = numpy.linspace(-180., 180., 721) + + sourceFileName = str(self.datadir.join('SST_annual_1870-1900.nc')) + + suffix = 'source_lat_lon' + weightFileName = '{}/weights_{}.nc'.format(self.test_dir, suffix) + outFileName = '{}/remapped_{}.nc'.format(self.test_dir, suffix) + + interpolate.build_remap_weights(sourceFileName=sourceFileName, + outWeightFileName=weightFileName, + sourceFileType='latlon', + method='bilinear', + destinationLat=lat, + destinationLon=lon) + + assert os.path.exists(weightFileName) + + interpolate.remap(inFileName=sourceFileName, + outFileName=outFileName, + inWeightFileName=weightFileName, + sourceFileType='mpas') + + assert os.path.exists(outFileName) + + # TODO: check the results against a reference result + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_interpolate/SST_annual_1870-1900.nc b/mpas_analysis/test/test_interpolate/SST_annual_1870-1900.nc new file mode 100644 index 000000000..cc54b3504 Binary files /dev/null and b/mpas_analysis/test/test_interpolate/SST_annual_1870-1900.nc differ diff --git a/mpas_analysis/test/test_interpolate/config.analysis b/mpas_analysis/test/test_interpolate/config.analysis new file mode 100644 index 000000000..d692f07d9 --- /dev/null +++ b/mpas_analysis/test/test_interpolate/config.analysis @@ -0,0 +1,3 @@ +[interpolate] +lat = numpy.linspace(-90., 90., 361) +lon = numpy.linspace(-180., 180., 721) diff --git a/mpas_analysis/test/test_interpolate/mpasMesh.nc b/mpas_analysis/test/test_interpolate/mpasMesh.nc new file mode 100644 index 000000000..f1d0341c3 Binary files /dev/null and b/mpas_analysis/test/test_interpolate/mpasMesh.nc differ diff --git a/mpas_analysis/test/test_interpolate/timeSeries.0002-01-01.nc b/mpas_analysis/test/test_interpolate/timeSeries.0002-01-01.nc new file mode 100644 index 000000000..86da95247 Binary files /dev/null and b/mpas_analysis/test/test_interpolate/timeSeries.0002-01-01.nc differ diff --git a/mpas_analysis/test/test_interpolate/timeSeries.0002-02-01.nc b/mpas_analysis/test/test_interpolate/timeSeries.0002-02-01.nc new file mode 100644 index 000000000..6b2c2f3cc Binary files /dev/null and b/mpas_analysis/test/test_interpolate/timeSeries.0002-02-01.nc differ diff --git a/mpas_analysis/test/test_interpolate/timeSeries.0002-03-01.nc b/mpas_analysis/test/test_interpolate/timeSeries.0002-03-01.nc new file mode 100644 index 000000000..fedaf550c Binary files /dev/null and b/mpas_analysis/test/test_interpolate/timeSeries.0002-03-01.nc differ diff --git a/mpas_analysis/test/test_io_utility.py b/mpas_analysis/test/test_io_utility.py index 00b63410a..710388717 100644 --- a/mpas_analysis/test/test_io_utility.py +++ b/mpas_analysis/test/test_io_utility.py @@ -7,15 +7,17 @@ import os import pytest -from mpas_analysis.test import (TestCase, loaddatadir) +from mpas_analysis.test import TestCase, loaddatadir from mpas_analysis.shared.io import paths + @pytest.mark.usefixtures("loaddatadir") class TestPaths(TestCase): def test_paths(self): os.chdir(bytes(self.datadir)) self.assertEquals(paths('[0-9]*', '[a-z]*'), - ['0.txt', '1.txt', '2.txt', 'a.txt', 'b.txt', 'c.txt']) + ['0.txt', '1.txt', '2.txt', 'a.txt', 'b.txt', + 'c.txt']) # vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_mpas_config_parser.py b/mpas_analysis/test/test_mpas_config_parser.py index 4cb09cb1c..e8efaef1d 100644 --- a/mpas_analysis/test/test_mpas_config_parser.py +++ b/mpas_analysis/test/test_mpas_config_parser.py @@ -1,14 +1,15 @@ """ Unit test infrastructure for MpasAnalysisConfigParser -Xylar Asay-Davis -12/05/2016 +Xylar Asay-Davis, Phillip J. Wolfram +01/31/2017 """ import pytest from mpas_analysis.test import TestCase, loaddatadir from mpas_analysis.configuration.MpasAnalysisConfigParser \ import MpasAnalysisConfigParser +from . import requires_numpy @pytest.mark.usefixtures("loaddatadir") @@ -30,8 +31,16 @@ def test_read_config(self): self.assertEqual(colorMapName, 'coolwarm') self.assertEqual(self.config.getint('Test', 'testInt'), 15) - self.assertEqual(self.config.getfloat('Test', 'testFloat'), 18.) + self.assertEqual(self.config.getExpression('Test', 'testInt'), 15) + + self.assertEqual(self.config.getfloat('Test', 'testFloat'), 18.0) + self.assertEqual(self.config.getExpression('Test', 'testFloat'), 18.0) + + self.assertEqual(self.config.getfloat('Test', 'testFloat2'), 3.) + self.assertEqual(self.config.getExpression('Test', 'testFloat2'), 3.) + self.assertEqual(self.config.getboolean('Test', 'testBool'), True) + self.assertEqual(self.config.getExpression('Test', 'testBool'), True) testList = self.config.getExpression('sst_modelvsobs', 'cmapIndicesModelObs') @@ -64,5 +73,52 @@ def test_read_config(self): 'key2': -12, 'key3': False}) + testNone = self.config.getExpression('Test', 'doesntexist') + assert testNone is None + + @requires_numpy + def test_read_config_numpy(self): + self.setup_config() + + # tests numpy evaluation capability + import numpy as np + for testname in ['testNumpyarange' + str(ii) for ii in np.arange(3)]: + self.assertArrayEqual(self.config.getExpression('TestNumpy', + testname, + usenumpyfunc=True), + np.arange(0, 1, 10)) + for testname in ['testNumpylinspace' + str(ii) for ii in np.arange(3)]: + self.assertArrayEqual(self.config.getExpression('TestNumpy', + testname, + usenumpyfunc=True), + np.linspace(0, 1, 10)) + for testNumpy in ['testNumpypi' + str(ii) for ii in np.arange(3)] + \ + ['testNumpyPi']: + self.assertEqual(self.config.getExpression('TestNumpy', testNumpy, + usenumpyfunc=True), + np.pi) + with self.assertRaisesRegexp( + AssertionError, + "'__' is not allowed in .* for `usenumpyfunc=True`"): + self.config.getExpression('TestNumpy', 'testBadStr', + usenumpyfunc=True), + + def test_get_with_default(self): + self.setup_config() + + def check_get_with_default(name, value, dtype): + # test an options that doesn't exist using getWithDefault + var = self.config.getWithDefault('sst_modelvsobs', name, value) + assert isinstance(var, dtype) + self.assertEqual(var, value) + + # test several types with getWithDefault + check_get_with_default(name='aBool', value=True, dtype=bool) + check_get_with_default(name='anInt', value=1, dtype=(int, long)) + check_get_with_default(name='aFloat', value=1.0, dtype=float) + check_get_with_default(name='aList', value=[1, 2, 3], dtype=list) + check_get_with_default(name='aTuple', value=(1, 2, 3), dtype=tuple) + check_get_with_default(name='aDict', value={'blah': 1}, dtype=dict) + check_get_with_default(name='aStr', value='blah', dtype=str) # vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_mpas_config_parser/config.analysis b/mpas_analysis/test/test_mpas_config_parser/config.analysis index 18071dae5..b6c999a33 100644 --- a/mpas_analysis/test/test_mpas_config_parser/config.analysis +++ b/mpas_analysis/test/test_mpas_config_parser/config.analysis @@ -21,6 +21,7 @@ comparisonTimes = ['JFM', 'JAS', 'ANN'] testInt = 15 testFloat = 18.0 +testFloat2 = 3. testBool = True @@ -32,3 +33,15 @@ testDict = {'key1': 'string', 'key2': -12, 'key3': False} +[TestNumpy] +testNumpyarange0 = arange(0, 1, 10) +testNumpyarange1 = np.arange(0, 1, 10) +testNumpyarange2 = numpy.arange(0, 1, 10) +testNumpylinspace0 = linspace(0, 1, 10) +testNumpylinspace1 = np.linspace(0, 1, 10) +testNumpylinspace2 = numpy.linspace(0, 1, 10) +testNumpypi0 = pi +testNumpypi1 = np.pi +testNumpypi2 = numpy.pi +testNumpyPi = Pi +testBadStr = __bad_string__ diff --git a/mpas_analysis/test/test_mpas_xarray.py b/mpas_analysis/test/test_mpas_xarray.py index cb65227fb..5abab4d8e 100644 --- a/mpas_analysis/test/test_mpas_xarray.py +++ b/mpas_analysis/test/test_mpas_xarray.py @@ -2,192 +2,173 @@ Unit test infrastructure for mpas_xarray. Xylar Asay-Davis, Phillip J. Wolfram -12/07/2016 +02/15/2017 """ import pytest from mpas_analysis.test import TestCase, loaddatadir from mpas_analysis.shared.mpas_xarray import mpas_xarray -import xarray as xr -import pandas as pd +from mpas_analysis.shared.timekeeping.utility import days_to_datetime, \ + string_to_datetime @pytest.mark.usefixtures("loaddatadir") -class TestNamelist(TestCase): +class TestMpasXarray(TestCase): def test_subset_variables(self): fileName = str(self.datadir.join('example_jan.nc')) + calendar = 'gregorian_noleap' timestr = ['xtime_start', 'xtime_end'] - varList = ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] + variableList = \ + ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] # first, test loading the whole data set and then calling # subset_variables explicitly - ds = xr.open_mfdataset( - fileName, - preprocess=lambda x: mpas_xarray.preprocess_mpas(x, - timestr=timestr, - yearoffset=1850)) - ds = mpas_xarray.subset_variables(ds, varList) - self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList)) - self.assertEqual(pd.Timestamp(ds.Time.values[0]), - pd.Timestamp('1855-01-16 12:22:30')) - + ds = mpas_xarray.open_multifile_dataset(fileNames=fileName, + calendar=calendar, + timeVariableName=timestr) + ds = mpas_xarray.subset_variables(ds, variableList) + self.assertEqual(sorted(ds.data_vars.keys()), sorted(variableList)) + self.assertEqual(days_to_datetime(days=ds.Time.values, + referenceDate='0001-01-01', + calendar=calendar), + string_to_datetime('0005-01-16 12:22:30')) # next, test the same with the onlyvars argument - ds = xr.open_mfdataset( - fileName, - preprocess=lambda x: mpas_xarray.preprocess_mpas(x, - timestr=timestr, - onlyvars=varList, - yearoffset=1850)) - self.assertEqual(ds.data_vars.keys(), varList) - - with self.assertRaisesRegexp(AssertionError, 'Empty dataset is returned.'): + ds = mpas_xarray.open_multifile_dataset(fileNames=fileName, + calendar=calendar, + timeVariableName=timestr, + variableList=variableList) + self.assertEqual(ds.data_vars.keys(), variableList) + + with self.assertRaisesRegexp(ValueError, + 'Empty dataset is returned.'): missingvars = ['foo', 'bar'] - ds = xr.open_mfdataset( - fileName, - preprocess=lambda x: mpas_xarray.preprocess_mpas(x, - timestr=timestr, - onlyvars=missingvars, - yearoffset=1850)) + ds = mpas_xarray.open_multifile_dataset(fileNames=fileName, + calendar=calendar, + timeVariableName=timestr, + variableList=missingvars) def test_iselvals(self): fileName = str(self.datadir.join('example_jan.nc')) + calendar = 'gregorian_noleap' + simulationStartTime = '0001-01-01' timestr = 'time_avg_daysSinceStartOfSim' - varList = \ + variableList = \ ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', 'refBottomDepth'] iselvals = {'nVertLevels': slice(0, 3)} - ds = xr.open_mfdataset( - fileName, - preprocess=lambda x: mpas_xarray.preprocess_mpas(x, - timestr=timestr, - onlyvars=varList, - iselvals=iselvals, - yearoffset=1850)) - self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList)) - self.assertEqual(ds[varList[0]].shape, (1, 7, 3)) + ds = mpas_xarray.open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + simulationStartTime=simulationStartTime, + timeVariableName=timestr, + variableList=variableList, + iselValues=iselvals) + + self.assertEqual(sorted(ds.data_vars.keys()), sorted(variableList)) + self.assertEqual(ds[variableList[0]].shape, (1, 7, 3)) self.assertEqual(ds['refBottomDepth'].shape, (3,)) self.assertApproxEqual(ds['refBottomDepth'][-1], 4.882000207901) - date = pd.Timestamp(ds.Time.values[0]) - # round to nearest second - date = pd.Timestamp(long(round(date.value, -9))) - self.assertEqual(date, pd.Timestamp('1855-01-13 12:24:14')) + + self.assertEqual(days_to_datetime(days=ds.Time.values[0], + referenceDate='0001-01-01', + calendar=calendar), + string_to_datetime('0005-01-14 12:24:14')) def test_no_units(self): fileName = str(self.datadir.join('example_no_units_jan.nc')) + calendar = 'gregorian_noleap' + simulationStartTime = '0001-01-01' timestr = 'time_avg_daysSinceStartOfSim' - varList = \ + variableList = \ ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', 'refBottomDepth'] - ds = xr.open_mfdataset( - fileName, - preprocess=lambda x: mpas_xarray.preprocess_mpas(x, - timestr=timestr, - onlyvars=varList, - yearoffset=1850)) - self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList)) - date = pd.Timestamp(ds.Time.values[0]) - # round to nearest second - date = pd.Timestamp(long(round(date.value, -9))) - self.assertEqual(date, pd.Timestamp('1855-01-13 12:24:14')) + ds = mpas_xarray.open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + simulationStartTime=simulationStartTime, + timeVariableName=timestr, + variableList=variableList) + self.assertEqual(sorted(ds.data_vars.keys()), sorted(variableList)) + self.assertEqual(days_to_datetime(days=ds.Time.values[0], + referenceDate='0001-01-01', + calendar=calendar), + string_to_datetime('0005-01-14 12:24:14')) def test_bad_selvals(self): fileName = str(self.datadir.join('example_jan.nc')) + calendar = 'gregorian_noleap' + simulationStartTime = '0001-01-01' timestr = 'time_avg_daysSinceStartOfSim' - varList = \ + variableList = \ ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', 'refBottomDepth'] selvals = {'refBottomDepth': 8.77999997138977} with self.assertRaisesRegexp(AssertionError, - 'not a dimension in the dataset that ' - 'can be used for selection'): - ds = xr.open_mfdataset( - fileName, - preprocess=lambda x: mpas_xarray.preprocess_mpas(x, - timestr=timestr, - onlyvars=varList, - selvals=selvals, - yearoffset=1850)) - + 'not a dimension in the dataset that ' + 'can be used for selection'): + mpas_xarray.open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + simulationStartTime=simulationStartTime, + timeVariableName=timestr, + variableList=variableList, + selValues=selvals) def test_selvals(self): fileName = str(self.datadir.join('example_jan.nc')) + calendar = 'gregorian_noleap' + simulationStartTime = '0001-01-01' timestr = 'time_avg_daysSinceStartOfSim' - varList = \ + variableList = \ ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', 'refBottomDepth'] - selvals = {'nVertLevels': 0} - ds = xr.open_mfdataset( - fileName, - preprocess=lambda x: mpas_xarray.preprocess_mpas(x, - timestr=timestr, - onlyvars=varList, - selvals=selvals, - yearoffset=1850)) - self.assertEqual(ds.data_vars.keys(), varList) - self.assertEqual(ds[varList[0]].shape, (1, 7)) - self.assertEqual(ds['nVertLevels'].shape, ()) - self.assertApproxEqual(ds['nVertLevels'], - selvals['nVertLevels']) - + dsRef = mpas_xarray.open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + simulationStartTime=simulationStartTime, + timeVariableName=timestr, + variableList=variableList, + selValues=None) + + for vertIndex in range(0, 11): + selvals = {'nVertLevels': vertIndex} + ds = mpas_xarray.open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + simulationStartTime=simulationStartTime, + timeVariableName=timestr, + variableList=variableList, + selValues=selvals) + + self.assertEqual(ds.data_vars.keys(), variableList) + self.assertEqual(ds[variableList[0]].shape, (1, 7)) + self.assertEqual(ds['refBottomDepth'], + dsRef['refBottomDepth'][vertIndex]) def test_remove_repeated_time_index(self): fileName = str(self.datadir.join('example_jan*.nc')) + calendar = 'gregorian_noleap' timestr = ['xtime_start', 'xtime_end'] - varList = ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] - - ds = xr.open_mfdataset( - fileName, - preprocess=lambda x: mpas_xarray.preprocess_mpas(x, - timestr=timestr, - onlyvars=varList, - yearoffset=1850)) - - self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList)) - self.assertEqual(len(ds.Time.values), 3) - - ds = mpas_xarray.remove_repeated_time_index(ds) + variableList = \ + ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] + + # repeat time indices are removed in openMultifileDataSet + ds = mpas_xarray.open_multifile_dataset( + fileNames=fileName, + calendar=calendar, + timeVariableName=timestr, + variableList=variableList) + + self.assertEqual(sorted(ds.data_vars.keys()), sorted(variableList)) + # There would be 3 time indices if repeat indices had not been removed. + # Make sure there are 2. self.assertEqual(len(ds.Time.values), 2) - def test_variable_map(self): - fileName = str(self.datadir.join('example_jan.nc')) - varMap = { - 'avgSurfaceTemperature': - ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature', - 'other_string', - 'yet_another_string'], - 'daysSinceStartOfSim': - ['time_avg_daysSinceStartOfSim', - 'xtime', - 'something_else'], - 'avgLayerTemperature': - ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', - 'test1', - 'test2'], - 'Time': [['xtime_start', 'xtime_end'], - 'time_avg_daysSinceStartOfSim']} - - varList = ['avgSurfaceTemperature', 'avgLayerTemperature', - 'refBottomDepth', 'daysSinceStartOfSim'] - - # preprocess_mpas will use varMap to map the variable names from their - # values in the file to the desired values in varList - ds = xr.open_mfdataset( - fileName, - preprocess=lambda x: mpas_xarray.preprocess_mpas( - x, - timestr='Time', - onlyvars=varList, - yearoffset=1850, - varmap=varMap)) - - # make sure the remapping happened as expected - self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList)) - # vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_namelist_streams_interface.py b/mpas_analysis/test/test_namelist_streams_interface.py index 400279656..0e29c9d2e 100644 --- a/mpas_analysis/test/test_namelist_streams_interface.py +++ b/mpas_analysis/test/test_namelist_streams_interface.py @@ -10,6 +10,7 @@ from mpas_analysis.test import TestCase, loaddatadir from mpas_analysis.shared.io import NameList, StreamsFile + @pytest.mark.usefixtures("loaddatadir") class TestNamelist(TestCase): def setup_namelist(self): @@ -50,41 +51,45 @@ def test_read_streamsfile(self): files = self.sf.readpath('output') expectedFiles = [] - for date in ['0001-01-01','0001-01-02','0001-02-01','0002-01-01']: + for date in ['0001-01-01', '0001-01-02', '0001-02-01', '0002-01-01']: expectedFiles.append('{}/output/output.{}_00.00.00.nc' .format(self.sf.streamsdir, date)) self.assertEqual(files, expectedFiles) files = self.sf.readpath('output', - startDate='0001-01-03', - endDate='0001-12-30') + startDate='0001-01-03', + endDate='0001-12-30', + calendar='gregorian_noleap') expectedFiles = [] - for date in ['0001-01-02','0001-02-01']: + for date in ['0001-01-02', '0001-02-01']: expectedFiles.append('{}/output/output.{}_00.00.00.nc' .format(self.sf.streamsdir, date)) self.assertEqual(files, expectedFiles) files = self.sf.readpath('output', - startDate='0001-01-03') + startDate='0001-01-03', + calendar='gregorian_noleap') expectedFiles = [] - for date in ['0001-01-02','0001-02-01','0002-01-01']: + for date in ['0001-01-02', '0001-02-01', '0002-01-01']: expectedFiles.append('{}/output/output.{}_00.00.00.nc' .format(self.sf.streamsdir, date)) self.assertEqual(files, expectedFiles) files = self.sf.readpath('output', - endDate='0001-12-30') + endDate='0001-12-30', + calendar='gregorian_noleap') expectedFiles = [] - for date in ['0001-01-01','0001-01-02','0001-02-01']: + for date in ['0001-01-01', '0001-01-02', '0001-02-01']: expectedFiles.append('{}/output/output.{}_00.00.00.nc' .format(self.sf.streamsdir, date)) self.assertEqual(files, expectedFiles) files = self.sf.readpath('restart', - startDate='0001-01-01', - endDate='0001-12-31') + startDate='0001-01-01', + endDate='0001-12-31', + calendar='gregorian_noleap') expectedFiles = [] - for seconds in ['00010','00020']: + for seconds in ['00010', '00020']: expectedFiles.append('{}/restarts/restart.0001-01-01_{}.nc' .format(self.sf.streamsdir, seconds)) self.assertEqual(files, expectedFiles) @@ -93,10 +98,10 @@ def test_read_streamsfile(self): expectedFiles = ['{}/mesh.nc'.format(self.sf.streamsdir)] self.assertEqual(files, expectedFiles) - files = self.sf.readpath('mesh', - startDate='0001-01-01', - endDate='0001-12-31') + startDate='0001-01-01', + endDate='0001-12-31', + calendar='gregorian_noleap') expectedFiles = ['{}/mesh.nc'.format(self.sf.streamsdir)] self.assertEqual(files, expectedFiles) diff --git a/mpas_analysis/test/test_run_analysis_utils.py b/mpas_analysis/test/test_run_analysis_utils.py new file mode 100644 index 000000000..abec13c0e --- /dev/null +++ b/mpas_analysis/test/test_run_analysis_utils.py @@ -0,0 +1,143 @@ +""" +Unit tests for utility functions in run_analysis + +Xylar Asay-Davis +02/03/2017 +""" + +import pytest +from mpas_analysis.test import TestCase +from run_analysis import checkGenerate +from mpas_analysis.configuration.MpasAnalysisConfigParser \ + import MpasAnalysisConfigParser + + +class TestRunAnalysisUtils(TestCase): + + def test_checkGenerate(self): + + def doTest(generate, expectedResults): + config = MpasAnalysisConfigParser() + config.add_section('output') + config.set('output', 'generate', generate) + for analysisName in expectedResults: + expectedResult = expectedResults[analysisName] + result = checkGenerate( + config, analysisName=analysisName, + mpasCore=cores[analysisName], + analysisCategory=categories[analysisName]) + self.assertEqual(result, expectedResult) + + # Comments from config.template about how generate works: + # + # a list of analyses to generate. Valid names are: + # 'timeSeriesOHC', 'timeSeriesSST', 'regriddedSST', + # 'regriddedSSS', 'regriddedMLD', 'timeSeriesSeaIceAreaVol', + # 'regriddedSeaIceConcThick' + # the following shortcuts exist: + # 'all' -- all analyses will be run + # 'all_timeSeries' -- all time-series analyses will be run + # 'all_regriddedHorizontal' -- all analyses involving regridded + # horizontal fields will be run + # 'all_ocean' -- all ocean analyses will be run + # 'all_seaIce' -- all sea-ice analyses will be run + # 'no_timeSeriesOHC' -- skip 'timeSeriesOHC' (and similarly with the + # other analyses). + # 'no_ocean', 'no_timeSeries', etc. -- in analogy to 'all_*', skip + # the given category of analysis + # an equivalent syntax can be used on the command line to override this + # option: + # ./run_analysis.py config.analysis --generate \ + # all,no_ocean,all_timeSeries + + cores = {'timeSeriesOHC': 'ocean', + 'timeSeriesSST': 'ocean', + 'timeSeriesNino34': 'ocean', + 'timeSeriesMHT': 'ocean', + 'timeSeriesMOC': 'ocean', + 'regriddedSST': 'ocean', + 'regriddedMLD': 'ocean', + 'regriddedSSS': 'ocean', + 'timeSeriesSeaIceAreaVol': 'seaIce', + 'regriddedSeaIceConcThick': 'seaIce'} + + categories = {'timeSeriesOHC': 'timeSeries', + 'timeSeriesSST': 'timeSeries', + 'timeSeriesNino34': 'timeSeries', + 'timeSeriesMHT': 'timeSeries', + 'timeSeriesMOC': 'timeSeries', + 'regriddedSST': 'regriddedHorizontal', + 'regriddedMLD': 'regriddedHorizontal', + 'regriddedSSS': 'regriddedHorizontal', + 'timeSeriesSeaIceAreaVol': 'timeSeries', + 'regriddedSeaIceConcThick': 'regriddedHorizontal'} + + # test 'all' + expectedResults = {} + for analysisName in cores: + expectedResults[analysisName] = True + doTest("['all']", expectedResults) + + # test 'all_' and ['all', 'no_'] + for category in set(categories.values()): + expectedResults = {} + for analysisName in categories: + expectedResults[analysisName] = \ + (categories[analysisName] == category) + doTest("['all_{}']".format(category), expectedResults) + + expectedResults = {} + for analysisName in categories: + expectedResults[analysisName] = \ + (categories[analysisName] != category) + doTest("['all', 'no_{}']".format(category), expectedResults) + + # test 'all_' and ['all', 'no_'] + for core in set(cores.values()): + expectedResults = {} + for analysisName in cores: + expectedResults[analysisName] = \ + (cores[analysisName] == core) + doTest("['all_{}']".format(core), expectedResults) + + expectedResults = {} + for analysisName in cores: + expectedResults[analysisName] = \ + (cores[analysisName] != core) + doTest("['all','no_{}']".format(core), expectedResults) + + # test each analysis individually + for analysisName in cores: + expectedResults = {} + for otherAnalysis in cores: + expectedResults[otherAnalysis] = \ + (analysisName == otherAnalysis) + doTest("['{}']".format(analysisName), expectedResults) + + # test a non-existent analysis + expectedResults = {} + for analysisName in cores: + expectedResults[analysisName] = False + doTest("['fakeAnalysis']", expectedResults) + + # test ['all', 'no_ocean', 'all_timeSeries'] + expectedResults = {} + for analysisName in cores: + expectedResults[analysisName] = True + for analysisName in cores: + if cores[analysisName] == 'ocean': + expectedResults[analysisName] = False + for analysisName in categories: + if categories[analysisName] == 'timeSeries': + expectedResults[analysisName] = True + doTest("['all', 'no_ocean', 'all_timeSeries']", expectedResults) + + # test ['all', 'no_timeSeriesOHC'] + expectedResults = {} + for analysisName in cores: + expectedResults[analysisName] = True + expectedResults['timeSeriesOHC'] = False + doTest("['all', 'no_timeSeriesOHC']", expectedResults) + + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_timekeeping.py b/mpas_analysis/test/test_timekeeping.py new file mode 100644 index 000000000..27161ffe8 --- /dev/null +++ b/mpas_analysis/test/test_timekeeping.py @@ -0,0 +1,292 @@ +""" +Unit test infrastructure for the Date class + +Author +------ +Xylar Asay-Davis + +Last Modified +------------- +02/17/2017 +""" + +import pytest +import datetime +from mpas_analysis.shared.timekeeping.MpasRelativeDelta \ + import MpasRelativeDelta +from mpas_analysis.test import TestCase +from mpas_analysis.shared.timekeeping.utility import string_to_datetime, \ + string_to_relative_delta, string_to_days_since_date, days_to_datetime, \ + datetime_to_days, date_to_days + + +class TestTimekeeping(TestCase): + def test_timekeeping(self): + + # test each possible format: + # YYYY-MM-DD_hh:mm:ss + # YYYY-MM-DD_hh.mm.ss + # YYYY-MM-DD_SSSSS + # DDD_hh:mm:ss + # DDD_hh.mm.ss + # DDD_SSSSS + # hh.mm.ss + # hh:mm:ss + # YYYY-MM-DD + # SSSSS + + for calendar in ['gregorian', 'gregorian_noleap']: + # test datetime.datetime + # YYYY-MM-DD_hh:mm:ss + date1 = string_to_datetime('0001-01-01_00:00:00') + date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, + second=0) + self.assertEqual(date1, date2) + + delta1 = string_to_relative_delta('0001-00-00_00:00:00', + calendar=calendar) + delta2 = MpasRelativeDelta(years=1, months=0, days=0, hours=0, + minutes=0, seconds=0, calendar=calendar) + self.assertEqual(delta1, delta2) + + # YYYY-MM-DD_hh.mm.ss + date1 = string_to_datetime('0001-01-01_00.00.00') + date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, + second=0) + self.assertEqual(date1, date2) + + # YYYY-MM-DD_SSSSS + date1 = string_to_datetime('0001-01-01_00002') + date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, + second=2) + self.assertEqual(date1, date2) + + # DDD_hh:mm:ss + delta1 = string_to_relative_delta('0001_00:00:01', + calendar=calendar) + delta2 = MpasRelativeDelta(years=0, months=0, days=1, hours=0, + minutes=0, seconds=1, calendar=calendar) + self.assertEqual(delta1, delta2) + + # DDD_hh.mm.ss + delta1 = string_to_relative_delta('0002_01.00.01', + calendar=calendar) + delta2 = MpasRelativeDelta(years=0, months=0, days=2, hours=1, + minutes=0, seconds=1, calendar=calendar) + self.assertEqual(delta1, delta2) + + # DDD_SSSSS + delta1 = string_to_relative_delta('0002_00003', + calendar=calendar) + delta2 = MpasRelativeDelta(years=0, months=0, days=2, hours=0, + minutes=0, seconds=3, calendar=calendar) + self.assertEqual(delta1, delta2) + + # hh:mm:ss + date1 = string_to_datetime('00:00:01') + date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, + second=1) + self.assertEqual(date1, date2) + + # hh.mm.ss + delta1 = string_to_relative_delta('00.00.01', + calendar=calendar) + delta2 = MpasRelativeDelta(years=0, months=0, days=0, hours=0, + minutes=0, seconds=1, calendar=calendar) + self.assertEqual(delta1, delta2) + + # YYYY-MM-DD + date1 = string_to_datetime('0001-01-01') + date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, + second=0) + self.assertEqual(date1, date2) + + # SSSSS + delta1 = string_to_relative_delta('00005', + calendar=calendar) + delta2 = MpasRelativeDelta(years=0, months=0, days=0, hours=0, + minutes=0, seconds=5, calendar=calendar) + self.assertEqual(delta1, delta2) + + date1 = string_to_datetime('1996-01-15') + delta = string_to_relative_delta('0005-00-00', + calendar=calendar) + date2 = date1-delta + self.assertEqual(date2, string_to_datetime('1991-01-15')) + + date1 = string_to_datetime('1996-01-15') + delta = string_to_relative_delta('0000-02-00', + calendar=calendar) + date2 = date1-delta + self.assertEqual(date2, string_to_datetime('1995-11-15')) + + date1 = string_to_datetime('1996-01-15') + delta = string_to_relative_delta('0000-00-20', + calendar=calendar) + date2 = date1-delta + self.assertEqual(date2, string_to_datetime('1995-12-26')) + + def test_MpasRelativeDeltaOps(self): + # test if the calendars behave as they should close to leap day + # also, test addition and subtraction of the form + # datetime.datetime +/- MpasRelativeDelta above + # both calendars with adding one day + for calendar, expected in zip(['gregorian', 'gregorian_noleap'], + ['2016-02-29', '2016-03-01']): + self.assertEqual(string_to_datetime('2016-02-28') + + string_to_relative_delta('0000-00-01', + calendar=calendar), + string_to_datetime(expected)) + + # both calendars with subtracting one day + for calendar, expected in zip(['gregorian', 'gregorian_noleap'], + ['2016-02-29', '2016-02-28']): + self.assertEqual(string_to_datetime('2016-03-01') - + string_to_relative_delta('0000-00-01', + calendar=calendar), + string_to_datetime(expected)) + + # both calendars with adding one month + for calendar, expected in zip(['gregorian', 'gregorian_noleap'], + ['2016-02-29', '2016-02-28']): + self.assertEqual(string_to_datetime('2016-01-31') + + string_to_relative_delta('0000-01-00', + calendar=calendar), + string_to_datetime(expected)) + + # both calendars with subtracting one month + for calendar, expected in zip(['gregorian', 'gregorian_noleap'], + ['2016-02-29', '2016-02-28']): + self.assertEqual(string_to_datetime('2016-03-31') - + string_to_relative_delta('0000-01-00', + calendar=calendar), + string_to_datetime(expected)) + + for calendar in ['gregorian', 'gregorian_noleap']: + + delta1 = string_to_relative_delta('0000-01-00', calendar=calendar) + delta2 = string_to_relative_delta('0000-00-01', calendar=calendar) + deltaSum = string_to_relative_delta('0000-01-01', + calendar=calendar) + # test MpasRelativeDelta + MpasRelativeDelta + self.assertEqual(delta1 + delta2, deltaSum) + # test MpasRelativeDelta - MpasRelativeDelta + self.assertEqual(deltaSum - delta2, delta1) + + # test MpasRelativeDelta(date1, date2) + date1 = string_to_datetime('0002-02-02') + date2 = string_to_datetime('0001-01-01') + delta = string_to_relative_delta('0001-01-01', calendar=calendar) + self.assertEqual(MpasRelativeDelta(dt1=date1, dt2=date2, + calendar=calendar), + delta) + + # test MpasRelativeDelta + datetime.datetime (an odd order but + # it's allowed...) + date1 = string_to_datetime('0001-01-01') + delta = string_to_relative_delta('0001-01-01', calendar=calendar) + date2 = string_to_datetime('0002-02-02') + self.assertEqual(delta + date1, date2) + + # test multiplication/division by scalars + delta1 = string_to_relative_delta('0001-01-01', calendar=calendar) + delta2 = string_to_relative_delta('0002-02-02', calendar=calendar) + self.assertEqual(2*delta1, delta2) + self.assertEqual(delta2/2, delta1) + + # make sure there's an error when we try to add MpasRelativeDeltas + # with different calendars + with self.assertRaisesRegexp(ValueError, + 'MpasRelativeDelta objects can only be ' + 'added if their calendars match.'): + delta1 = string_to_relative_delta('0000-01-00', + calendar='gregorian') + delta2 = string_to_relative_delta('0000-00-01', + calendar='gregorian_noleap') + deltaSum = delta1 + delta2 + + def test_string_to_days_since_date(self): + referenceDate = '0001-01-01' + for calendar in ['gregorian', 'gregorian_noleap']: + for dateString, expected_days in [('0001-01-01', 0.), + ('0001-01-02', 1.), + ('0001-02-01', 31.), + ('0002-01-01', 365.)]: + days = string_to_days_since_date(dateString=dateString, + calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(days, expected_days) + + referenceDate = '2016-01-01' + for calendar, expected_days in [('gregorian', 366.), + ('gregorian_noleap', 365.)]: + days = string_to_days_since_date(dateString='2017-01-01', + calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(days, expected_days) + + def test_days_to_datetime(self): + referenceDate = '0001-01-01' + for calendar in ['gregorian', 'gregorian_noleap']: + for dateString, days in [('0001-01-01', 0.), + ('0001-01-02', 1.), + ('0001-02-01', 31.), + ('0002-01-01', 365.)]: + datetime = days_to_datetime(days=days, + calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(datetime, string_to_datetime(dateString)) + + referenceDate = '2016-01-01' + for calendar, days in [('gregorian', 366.), + ('gregorian_noleap', 365.)]: + datetime = days_to_datetime(days=days, + calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(datetime, string_to_datetime('2017-01-01')) + + def test_datetime_to_days(self): + referenceDate = '0001-01-01' + for calendar in ['gregorian', 'gregorian_noleap']: + for dateString, expected_days in [('0001-01-01', 0.), + ('0001-01-02', 1.), + ('0001-02-01', 31.), + ('0002-01-01', 365.)]: + days = datetime_to_days(dates=string_to_datetime(dateString), + calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(days, expected_days) + + referenceDate = '2016-01-01' + for calendar, expected_days in [('gregorian', 366.), + ('gregorian_noleap', 365.)]: + days = datetime_to_days(dates=string_to_datetime('2017-01-01'), + calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(days, expected_days) + + def test_date_to_days(self): + referenceDate = '0001-01-01' + for calendar in ['gregorian', 'gregorian_noleap']: + days = date_to_days(year=1, month=1, day=1, calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(days, 0.) + days = date_to_days(year=1, month=1, day=2, calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(days, 1.) + days = date_to_days(year=1, month=2, day=1, calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(days, 31.) + days = date_to_days(year=2, month=1, day=1, calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(days, 365.) + + referenceDate = '2016-01-01' + for calendar, expected_days in [('gregorian', 366.), + ('gregorian_noleap', 365.)]: + days = date_to_days(year=2017, month=1, day=1, + calendar=calendar, + referenceDate=referenceDate) + self.assertEqual(days, expected_days) + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/run_analysis.py b/run_analysis.py old mode 100644 new mode 100755 index 493bdfbbc..25143aab6 --- a/run_analysis.py +++ b/run_analysis.py @@ -1,86 +1,101 @@ #!/usr/bin/env python """ -Runs MPAS-Analysis via configuration file `config.analysis` specifying analysis -options. +Runs MPAS-Analysis via a configuration file (e.g. `config.analysis`) +specifying analysis options. Author: Xylar Asay-Davis, Phillip J. Wolfram -Last Modified: 12/06/2016 +Last Modified: 03/23/2017 """ import os -import sys import matplotlib as mpl +import argparse from mpas_analysis.configuration.MpasAnalysisConfigParser \ import MpasAnalysisConfigParser -from mpas_analysis.ocean.variable_stream_map import oceanStreamMap, \ - oceanVariableMap -from mpas_analysis.sea_ice.variable_stream_map import seaIceStreamMap, \ - seaIceVariableMap - - -def path_existence(config, section, option, ignorestr=None): # {{{ - inpath = config.get(section, option) - if not (os.path.isdir(inpath) or os.path.isfile(inpath)): - # assumes that path locations of ignorestr won't return an error, e.g., - # ignorestr="none" is a key word to indicate the path or file is - # optional and is not needed - if inpath == ignorestr: - return False - errmsg = "Path %s not found. Exiting..." % inpath - raise SystemExit(errmsg) - return inpath # }}} - - -def makedirs(inpath): # {{{ - if not os.path.exists(inpath): - os.makedirs(inpath) - return inpath # }}} +def checkPathExists(path): # {{{ + """ + Raise an exception if the given path does not exist. + + Author: Xylar Asay-Davis + Last Modified: 02/02/2017 + """ + if not (os.path.isdir(path) or os.path.isfile(path)): + raise OSError('Path {} not found'.format(path)) +# }}} + + +def checkGenerate(config, analysisName, mpasCore, analysisCategory=None): + # {{{ + """ + determine if a particular analysis of a particular core and (optionally) + category should be generated. + + Author: Xylar Asay-Davis + Last Modified: 02/02/2017 + """ + generateList = config.getExpression('output', 'generate') + generate = False + for element in generateList: + if '_' in element: + (prefix, suffix) = element.split('_', 1) + else: + prefix = element + suffix = None + + if prefix == 'all': + if (suffix in [mpasCore, analysisCategory]) or (suffix is None): + generate = True + elif prefix == 'no': + if suffix in [analysisName, mpasCore, analysisCategory]: + generate = False + elif element == analysisName: + generate = True + + return generate # }}} def analysis(config): # {{{ # set default values of start and end dates for climotologies and - # timeseries - if config.has_option('time', 'climo_yr1') and \ - config.has_option('time', 'climo_yr2'): - startDate = '{:04d}-01-01_00:00:00'.format( - config.getint('time', 'climo_yr1')) - endDate = '{:04d}-12-31_23:59:59'.format( - config.getint('time', 'climo_yr2')) - config.getWithDefault('time', 'climo_start_date', startDate) - config.getWithDefault('time', 'climo_end_date', endDate) - - if config.has_option('time', 'timeseries_yr1') and \ - config.has_option('time', 'timeseries_yr2'): + # timeseries and indices + for section in ['climatology', 'timeSeries', 'index']: startDate = '{:04d}-01-01_00:00:00'.format( - config.getint('time', 'timeseries_yr1')) + config.getint(section, 'startYear')) + if not config.has_option(section, 'startDate'): + config.set(section, 'startDate', startDate) endDate = '{:04d}-12-31_23:59:59'.format( - config.getint('time', 'timeseries_yr2')) - config.getWithDefault('time', 'timeseries_start_date', startDate) - config.getWithDefault('time', 'timeseries_end_date', endDate) + config.getint(section, 'endYear')) + if not config.has_option(section, 'endDate'): + config.set(section, 'endDate', endDate) # Checks on directory/files existence: - if config.get('case', 'ref_casename_v0') != 'None': - path_existence(config, 'paths', 'ref_archive_v0_ocndir') - path_existence(config, 'paths', 'ref_archive_v0_seaicedir') - - generate_seaice_timeseries = config.getboolean('seaice_timeseries', - 'generate') - seaice_compare_obs = config.getboolean('seaice_timeseries', - 'compare_with_obs') - generate_seaice_modelvsobs = config.getboolean('seaice_modelvsobs', - 'generate') - if (generate_seaice_timeseries and seaice_compare_obs) or \ - generate_seaice_modelvsobs: + if config.get('runs', 'preprocessedReferenceRunName') != 'None': + checkPathExists(config.get('oceanPreprocessedReference', + 'baseDirectory')) + checkPathExists(config.get('seaIcePreprocessedReference', + 'baseDirectory')) + + generateTimeSeriesSeaIce = checkGenerate( + config, analysisName='timeSeriesSeaIceAreaVol', mpasCore='seaIce', + analysisCategory='timeSeries') + compareTimeSeriesSeaIceWithObservations = config.getboolean( + 'timeSeriesSeaIceAreaVol', 'compareWithObservations') + generateRegriddedSeaIce = checkGenerate( + config, analysisName='regriddedSeaIceConcThick', mpasCore='seaIce', + analysisCategory='regriddedHorizontal') + + if ((generateTimeSeriesSeaIce and + compareTimeSeriesSeaIceWithObservations) or generateRegriddedSeaIce): # we will need sea-ice observations. Make sure they're there - for obsfile in ['obs_iceareaNH', 'obs_iceareaSH', 'obs_icevolNH', - 'obs_icevolSH']: - path_existence(config, 'seaIceData', obsfile, ignorestr='none') - - makedirs(config.get('paths', 'plots_dir')) + baseDirectory = config.get('seaIceObservations', 'baseDirectory') + for observationName in ['areaNH', 'areaSH', 'volNH', 'volSH']: + fileName = config.get('seaIceObservations', observationName) + if fileName.lower() == 'none': + continue + checkPathExists('{}/{}'.format(baseDirectory, fileName)) # choose the right rendering backend, depending on whether we're displaying # to the screen @@ -91,75 +106,80 @@ def analysis(config): # {{{ # analysis can only be imported after the right MPL renderer is selected # GENERATE OCEAN DIAGNOSTICS - if config.getboolean('ohc_timeseries', 'generate'): + if checkGenerate(config, analysisName='timeSeriesOHC', mpasCore='ocean', + analysisCategory='timeSeries'): print "" print "Plotting OHC time series..." from mpas_analysis.ocean.ohc_timeseries import ohc_timeseries - ohc_timeseries(config, streamMap=oceanStreamMap, - variableMap=oceanVariableMap) + ohc_timeseries(config) - if config.getboolean('sst_timeseries', 'generate'): + if checkGenerate(config, analysisName='timeSeriesSST', mpasCore='ocean', + analysisCategory='timeSeries'): print "" print "Plotting SST time series..." from mpas_analysis.ocean.sst_timeseries import sst_timeseries - sst_timeseries(config, streamMap=oceanStreamMap, - variableMap=oceanVariableMap) - - if config.getboolean('nino34_timeseries', 'generate'): - print "" - print "Plotting Nino3.4 time series..." - # from mpas_analysis.ocean.nino34_timeseries import nino34_timeseries - # nino34_timeseries(config) + sst_timeseries(config) - if config.getboolean('mht_timeseries', 'generate'): + if checkGenerate(config, analysisName='indexNino34', + mpasCore='ocean', analysisCategory='index'): print "" - print "Plotting Meridional Heat Transport (MHT)..." - # from mpas_analysis.ocean.mht_timeseries import mht_timeseries - # mht_timeseries(config) - - if config.getboolean('moc_timeseries', 'generate'): - print "" - print "Plotting Meridional Overturning Circulation (MOC)..." - # from mpas_analysis.ocean.moc_timeseries import moc_timeseries - # moc_timeseries(config) - - if config.getboolean('sst_modelvsobs', 'generate'): + print "Plotting Nino3.4 time series and power spectrum...." + from mpas_analysis.ocean.nino34_index import nino34_index + nino34_index(config) + +# if checkGenerate(config, analysisName='timeSeriesMHT', mpasCore='ocean', +# analysisCategory='timeSeries'): +# print "" +# print "Plotting Meridional Heat Transport (MHT)..." +# from mpas_analysis.ocean.mht_timeseries import mht_timeseries +# mht_timeseries(config) + + if checkGenerate(config, analysisName='regriddedSST', mpasCore='ocean', + analysisCategory='regriddedHorizontal'): print "" print "Plotting 2-d maps of SST climatologies..." from mpas_analysis.ocean.ocean_modelvsobs import ocn_modelvsobs - ocn_modelvsobs(config, 'sst', streamMap=oceanStreamMap, - variableMap=oceanVariableMap) + ocn_modelvsobs(config, 'sst') - if config.getboolean('mld_modelvsobs', 'generate'): + if checkGenerate(config, analysisName='regriddedMLD', mpasCore='ocean', + analysisCategory='regriddedHorizontal'): print "" print "Plotting 2-d maps of MLD climatologies..." from mpas_analysis.ocean.ocean_modelvsobs import ocn_modelvsobs - ocn_modelvsobs(config, 'mld', streamMap=oceanStreamMap, - variableMap=oceanVariableMap) + ocn_modelvsobs(config, 'mld') - if config.getboolean('sss_modelvsobs', 'generate'): + if checkGenerate(config, analysisName='regriddedSSS', mpasCore='ocean', + analysisCategory='regriddedHorizontal'): print "" print "Plotting 2-d maps of SSS climatologies..." from mpas_analysis.ocean.ocean_modelvsobs import ocn_modelvsobs - ocn_modelvsobs(config, 'sss', streamMap=oceanStreamMap, - variableMap=oceanVariableMap) + ocn_modelvsobs(config, 'sss') + if checkGenerate(config, analysisName='streamfunctionMOC', + mpasCore='ocean', + analysisCategory='streamfunctionMOC'): + print "" + print "Plotting streamfunction of Meridional Overturning Circulation (MOC)..." + from mpas_analysis.ocean.meridional_overturning_circulation \ + import moc_streamfunction + moc_streamfunction(config) # GENERATE SEA-ICE DIAGNOSTICS - if config.getboolean('seaice_timeseries', 'generate'): + if checkGenerate(config, analysisName='timeSeriesSeaIceAreaVol', + mpasCore='seaIce', analysisCategory='timeSeries'): print "" print "Plotting sea-ice area and volume time series..." from mpas_analysis.sea_ice.timeseries import seaice_timeseries - seaice_timeseries(config, streamMap=seaIceStreamMap, - variableMap=seaIceVariableMap) + seaice_timeseries(config) - if config.getboolean('seaice_modelvsobs', 'generate'): + if checkGenerate(config, analysisName='regriddedSeaIceConcThick', + mpasCore='seaIce', + analysisCategory='regriddedHorizontal'): print "" print "Plotting 2-d maps of sea-ice concentration and thickness " \ "climatologies..." from mpas_analysis.sea_ice.modelvsobs import seaice_modelvsobs - seaice_modelvsobs(config, streamMap=seaIceStreamMap, - variableMap=seaIceVariableMap) + seaice_modelvsobs(config) # GENERATE LAND-ICE DIAGNOSTICS @@ -170,14 +190,42 @@ def analysis(config): # {{{ if __name__ == "__main__": - # process command line arguments and run analysis from configuration - if len(sys.argv) <= 1: - print "usage: %s []" % sys.argv[0] - exit(1) + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument("-g", "--generate", dest="generate", + help="A list of analysis modules to generate " + "(nearly identical generate option in config file).", + metavar="ANALYSIS1[,ANALYSIS2,ANALYSIS3,...]") + parser.add_argument('configFiles', metavar='CONFIG', + type=str, nargs='+', help='config file') + args = parser.parse_args() + + for configFile in args.configFiles: + if not os.path.exists(configFile): + raise OSError('Config file {} not found.'.format(configFile)) + + # add config.default to cover default not included in the config files + # provided on the command line + defaultConfig = '{}/config.default'.format( + os.path.dirname(os.path.realpath(__file__))) + if os.path.exists(defaultConfig): + configFiles = [defaultConfig] + args.configFiles + else: + print 'WARNING: Did not find config.default. Assuming other config ' \ + 'file(s) contain a\nfull set of configuration options.' + configFiles = args.configFiles - configFileNames = sys.argv[1:] config = MpasAnalysisConfigParser() - config.read(configFileNames) + config.read(configFiles) + + if args.generate: + # overwrite the 'generate' in config with a string that parses to + # a list of string + generateList = args.generate.split(',') + generateString = ', '.join(["'{}'".format(element) + for element in generateList]) + generateString = '[{}]'.format(generateString) + config.set('output', 'generate', generateString) analysis(config)