Merge branch 'main' into python312

valeriupredoi · Dec 4, 2023 · df47fc4 · df47fc4
2 parents f561ecf + b086386
commit df47fc4
Show file tree

Hide file tree

Showing 35 changed files with 1,305 additions and 53 deletions.
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -42,6 +42,7 @@ jobs:
           analysis_level3_sameYear --help
           analysis_p2p --help
           analysis_timeseries --help
+          batch_timeseries --help
           bgcval2_make_report --help
           download_from_mass --help
       - shell: bash -l {0}

diff --git a/.gitignore b/.gitignore
@@ -20,3 +20,8 @@ _sidebar.rst.inc
 local_test/BGC_data/valeriu
 mass_scripts
 CompareReports2
+.idea/workspace.xml
+*.iml
+.idea/inspectionProfiles/profiles_settings.xml
+.idea/misc.xml
+.idea/vcs.xml
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 [![made-with-python](https://img.shields.io/badge/Made%20with-Python-1f425f.svg)](https://www.python.org/)
 [![Github Actions Test](https://github.com/valeriupredoi/bgcval2/actions/workflows/run-tests.yml/badge.svg)](https://github.com/valeriupredoi/bgcval2/actions/workflows/run-tests.yml)
 
-![bgcval2logo](https://github.com/valeriupredoi/bgcval2/blob/main/doc/figures/BGCVal2-logo-2.png)
+![bgcval2logo](https://github.com/valeriupredoi/bgcval2/blob/main/doc/figures/bgcval2_logo_v_small.png)
 
 bgcval2
 =======
@@ -115,6 +115,7 @@ Executable name | What it does | Command
 `bgcval` | runs time series and point to point. | bgcval jobID
 `bgcval2_make_report` | makes the single model HTML report. | bgcval2_make_report jobID
 `analysis_compare` | runs comparison of multiple single jobs  | analysis_compare
+`batch_timeseries` | Submits single job time series analysis to slurm | batch_timeseries
 
 
 ### Checking out development branches
@@ -229,6 +230,7 @@ strictFileCheck: <bool>
 jobs:
    <jobID1>:
       description: <descrption of the first job>
+      label: 'Job info'
       colour: red
       thickness: 0.7
       linestyle: '-'
@@ -271,6 +273,10 @@ These values are:
    - The options are:
      - `description`:
        - A description of job, which helps people differentiate the jobs in the final report.
+     - `label`:
+       - A short description of the job which will appear in the plot legend.
+       - If you make it too long, it won't fit on the plot.  
+       - Optional: Default behaviour is the jobID.
      - `colour`:
        - The colour of this jobs line in the report plots.
        - Default colour is a randomly generated hex colour.
@@ -314,6 +320,50 @@ then the report will appear on the [JASMIN public facing page](https://gws-acces
 which is public facing but password protected.
 
 
+Batch times series Analysis
+===========================
+
+The `batch_timeseries` tool can take an `analysis_compare` input yaml file,
+and instead of running the time series analysis for each job on
+the interactive shell terminal in series, it uses slurm to submit
+each job as an independent job. 
+
+On jasmin, users can run up to five jobs simulataneously,
+so this can singnificantly boost the speed of the analysis. 
+
+The command to run it is:
+```
+batch_timeseries - y comparison_recipe.yml
+```
+
+This will submit a time-series analysis for each job, using a command which looks like this:
+```
+sbatch -J jobID --error=logs/jobID .err --output=logs/jobID .out lotus_timeseries.sh jobID  kmf physics bgc
+```
+The output and error messages will be in the `logs` directory with the jobID as the file prefix.
+The job name on slurm will also be the jobID, so it's easy to tell which jobs are running.
+The analysis suites will be appended as a list to the end of the command.
+In order to reduce the chance of analysing the same jobID twice, `batch_timeseries`
+checks whether a job exists, either currently running or in the queue before submitting.
+If a jobID exists, it is not re-submitted. However, this means that
+if two versions of the same jobID are submitted one after the other
+with different suite lists (`kmf`, `physics`, `bgc`), then only the first 
+set of suites will be run. 
+
+There is also an optional flag `-d` or `--dry_run` to test `batch_timeseries`, 
+which outputs the submission command to screen but does not submit the jobs.
+
+Note that this task does not run the `analysis_compare` suite so it will 
+not generate the html report. However, the html report can be generated more quickly
+with the `-s` argument to skip the `analysis_timeseries` section 
+described above. 
+
+In addition, note that this will not run the `download_from_mass`
+script, so jobs added here will not be included in the automated download.
+However, these jobs are added for automated download when `analysis_compare` 
+is used. 
+
+
 Downloading data using MASS
 ===========================
 
@@ -447,6 +497,8 @@ units: Units
 dimensions: 1,2 or  3 # The number of dimensions after the calculations are performed
 layers          : Surface
 regions         : Global ignoreInlandSeas SouthernOcean ArcticOcean Equator10 Remainder NorthernSubpolarAtlantic NorthernSubpolarPacific
+smoothings      : DataOnly both5 both30 movingav30years 5and30 30and100
+
 
 #paths:
 modelFiles      : $BASEDIR_MODEL/$JOBID/nemo_$JOBIDo_1y_*_grid-T.nc
@@ -500,6 +552,27 @@ basic functions such as multiply by or add to, or `noChange`, which can all be
 called without providing the `path`, and which may have their own key word
 arguments.
 
+Some of the other options for each analysis include:
+  - `layers`:
+    - The z axis layers that you want to use.
+    - Typically include things like `Surface`, `500m`, `1000m` etc.
+    - Note that this is a pre-curated list, so you can't use it to select a specific depth (like `327m` or something)
+  - `regions`:
+    - A list of regions to investigate.
+    - This is a pre-curated list, and they are defined in so you can't.
+    - These regions are defined in the file `bgcval2/bgcvaltools/makeMask.py`.
+  -  `smoothings`:
+    - This is the smoothing function (if any) to apply to the data before plotting it.
+    - The smoothing it added before plotting, but after saving the shelve file, so it doesn't impact the data.
+    - No smoothing is `DataOnly`, which is also the default behaviour. 
+    - If several smoothing options are added here, bgcval2 will generate a comparison plot for each one.
+    - The smoothings are defined and performed in `bgcval2/timeseries/timeseriesPlots.py`
+    - Other modes exist:
+      - `movingav30years`: A 30 year moving average
+      - `both5`: Both no smoothing and a 5 year moving average
+      - `both30`: Both no smoothing and a 30 year moving average
+      - `5and30`: a 5 year moving average and a 30 year moving average.
+
 Clearing the Cache
 ------------------
 

diff --git a/bgcval2/analysis_compare.py b/bgcval2/analysis_compare.py
@@ -189,6 +189,7 @@ def timeseries_compare(jobs,
                        jobDescriptions={},
                        lineThicknesses=defaultdict(lambda: 1),
                        linestyles=defaultdict(lambda: '-'),
+                       labels = {},
                        ensembles={},
                        config_user=None,
     ):
@@ -382,6 +383,7 @@ def timeseries_compare(jobs,
         regions = av[name]['regions']
         layers = av[name]['layers']
         metrics = av[name]['metrics']
+        smoothings = av[name]['smoothings']
         for region, layer, metric  in itertools.product(regions, layers, metrics):
             timesD = {}
             arrD = {}
@@ -405,20 +407,22 @@ def timeseries_compare(jobs,
             units = av[name]['modeldetails']['units']
 
             ts = 'Together'
-            for ls in ['DataOnly', ]:
+            for smoothing in smoothings:
+            #or ls in ['DataOnly', ]:
                 tsp.multitimeseries(
                     timesD,  # model times (in floats)
                     arrD,  # model time series
                     data=-999,  # in situ data distribution
                     title=title,
                     filename=bvt.folder(imageFolder) +
-                        '_'.join([name, region, layer, ts, ls + '.png']),
+                        '_'.join([name, region, layer, ts, smoothing + '.png']),
                     units=units,
                     plotStyle=ts,
-                    lineStyle=ls,
+                    smoothing=smoothing,
                     colours=colours,
                     thicknesses=lineThicknesses,
                     linestyles=linestyles,
+                    labels=labels,
                 )
 
     # Generate a list of comparison images:
@@ -515,7 +519,8 @@ def load_comparison_yml(master_compare_yml_fn):
     descriptions = {}
     shifttimes = {} # number of years to shift time axis.
     timeranges = {}
-
+    labels = {}
+
     for jobID, job_dict in details['jobs'].items():
         if job_dict.get('colour', False):
             colours[jobID] = job_dict['colour']
@@ -530,13 +535,16 @@ def load_comparison_yml(master_compare_yml_fn):
         timeranges[jobID] = job_dict.get('timerange', None)
         suites[jobID] = job_dict.get('suite', default_suite)
         auto_download_dict[jobID] = job_dict.get('auto_download', auto_download_dict[jobID]) 
+        labels[jobID] = job_dict.get('label', jobID)
+
 
     details['colours'] = colours
     details['descriptions'] = descriptions
     details['thicknesses'] = thicknesses
     details['linestyles'] = linestyles
     details['shifttimes'] = shifttimes
     details['timeranges'] = timeranges
+    details['labels'] = labels
     details['suites'] = suites
     details['auto_download'] = auto_download_dict
     return details
@@ -567,6 +575,7 @@ def load_yml_and_run(compare_yml, config_user, skip_timeseries):
     descriptions = details['descriptions']
     shifttimes = details['shifttimes']
     timeranges = details['timeranges']
+    labels = details['labels']
     suites = details['suites']
     auto_download = details['auto_download']
     strictFileCheck = details.get('strictFileCheck', True)
@@ -578,6 +587,7 @@ def load_yml_and_run(compare_yml, config_user, skip_timeseries):
         print(jobID, 'colour:',colours[jobID])
         print(jobID, 'line thickness & style:',thicknesses[jobID], linestyles[jobID])
         print(jobID, 'Shift time by', shifttimes[jobID])
+        print(jobID, 'Label: ', labels[jobID])
         print(jobID, 'Time range (None means all):', timeranges.get(jobID, None))
         print(jobID, 'suite:', suites[jobID])
         print(jobID, 'auto_download', auto_download[jobID])
@@ -620,6 +630,7 @@ def load_yml_and_run(compare_yml, config_user, skip_timeseries):
         analysisname=analysis_name,
         lineThicknesses=thicknesses,
         linestyles=linestyles,
+        labels=labels,
         config_user=config_user,
     )
 

diff --git a/bgcval2/analysis_timeseries.py b/bgcval2/analysis_timeseries.py
@@ -297,6 +297,11 @@ def load_key_file(key, paths, jobID, strictFileCheck=True):
     output_dict['metrics'] = key_dict.get('metrics', metricList)
     output_dict['metrics'] = parse_list_from_string(output_dict['metrics'])
 
+    # Smoothings:
+    default_smoothings = ['DataOnly', ]
+    output_dict['smoothings'] = key_dict.get('smoothings', default_smoothings)
+    output_dict['smoothings'] = parse_list_from_string(output_dict['smoothings'])
+
     # Load Grid:
     gridFile = key_dict.get('gridFile', paths.orcaGridfn)
     output_dict['gridFile'] = list_input_files(gridFile, key_dict, paths)[0]
@@ -728,7 +733,8 @@ def applyLandMask1e3(nc, keys):
                 gridFile=av[name]['gridFile'],
                 clean=False,
             )
-
+    print("analysis_timeseries:\tINFO:\tEnd of the timeseries analysis", jobID, suites)
+
 
 def get_args():
     """Parse command line arguments. """