Merge pull request #3 from NOAA-GFDL/add-pytest

Add unit testing with pytest and update the github CI
NOAA-GFDL · Jul 12, 2024 · 8ea5797 · 8ea5797
2 parents 30ed7e7 + 3dc9023
commit 8ea5797
Show file tree

Hide file tree

Showing 8 changed files with 184 additions and 214 deletions.
diff --git a/.github/workflows/ci-analysis.yml b/.github/workflows/ci-analysis.yml
@@ -1,4 +1,5 @@
-name: Python Package using Conda
+# Installs the Python dependencies and runs the freanalysis_clouds plugin.
+name: Test freanalysis_clouds plugin
 
 on: [push]
 
@@ -7,42 +8,25 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       max-parallel: 5
-
+      matrix:
+        python-version: ['3.9', '3.10', '3.11']
     steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python 3.10
+    - name: Checkout repository
+      uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v3
       with:
-        python-version: '3.10'
-    - name: Add conda to system path
-      run: |
-        # $CONDA is an environment variable pointing to the root of the miniconda directory
-        echo $CONDA/bin >> $GITHUB_PATH
-    - name: Install intakebuilder 
-      run: |
-        conda config --add channels noaa-gfdl
-        conda config --append channels conda-forge
-        conda install intakebuilder -c noaa-gfdl
-    - name: Download sample cloud fields mid_cld_amt, low_cld_amt, high_cld_amt
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
       run: |
-           mkdir -p archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp/atmos/ts/monthly/1yr/ && cd archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp/atmos/ts/monthly/1yr/ && curl -O ftp://nomads.gfdl.noaa.gov/1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr/atmos.198001-198012.high_cld_amt.nc && curl -O ftp://nomads.gfdl.noaa.gov/1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr/atmos.198001-198012.low_cld_amt.nc && curl -O ftp://nomads.gfdl.noaa.gov/1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr/atmos.198001-198012.mid_cld_amt.nc
-           ls /home/runner/work/analysis-scripts-fork/analysis-scripts-fork/archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp/atmos/ts/monthly/1yr/
-   # - name: 'Generate catalog'
-   #   run: |
-   #     $CONDA/bin/python tests/mdtf_timeslice_catalog.py 
-    - name: 'Generate MDTF time-slice sample catalog with yaml'
+        python3 -m pip install --upgrade pip
+        cd analysis-scripts; pip install .; cd ..
+        cd figure_tools; pip install .; cd ..
+        cd freanalysis; pip install .; cd ..
+        cd freanalysis_clouds; pip install .; cd ..
+        git clone https://github.com/aradhakrishnanGFDL/CatalogBuilder catalogbuilder
+        cd catalogbuilder; pip install .; cd ..
+        pip install pytest
+    - name: Test freanalysis_clouds
       run: |
-         $CONDA/bin/python tests/mdtf_timeslice_catalog.py
-    - name: upload-artifacts1
-      uses: actions/upload-artifact@v4
-      with:
-          name: workflow-artifacts1 
-          path: |
-            gfdl_analysis_citest.csv
-            gfdl_analysis_citest.json
-            gfdl_analysis_citest.csv
-            gfdl_analysis_citest.json
-    - name: Download all workflow run artifacts
-      uses: actions/download-artifact@v4
-
-
+        python3 -m pytest tests
diff --git a/freanalysis_aerosol/freanalysis_aerosol/__init__.py b/freanalysis_aerosol/freanalysis_aerosol/__init__.py
@@ -6,44 +6,23 @@
 from figure_tools import LonLatMap, zonal_mean_vertical_and_column_integrated_map, \
                          ZonalMeanMap
 import intake
-import intake_esm
 
 
 @dataclass
 class Metadata:
-    activity_id: str = "dev"
-    institution_id: str = ""
-    source_id: str = ""
-    experiment_id: str = "c96L65_am5f4b4r1-newrad_amip"
+    """Helper class that stores the metadata needed by the plugin."""
     frequency: str = "monthly"
-    modeling_realm: str = "atmos_month_aer"
-    table_id: str = ""
-    member_id: str = "na"
-    grid_label: str = ""
-    temporal_subset: str = ""
-    chunk_freq: str = ""
-    platform: str = ""
-    cell_methods: str = ""
-
-    def catalog_search_args(self, name):
-        return {
-            "experiment_id": self.experiment_id,
-            "frequency": self.frequency,
-            "member_id": self.member_id,
-            "modeling_realm": self.modeling_realm,
-            "variable_id": name,
-        }
+    realm: str = "atmos"
 
-    def catalog_key(self, name) -> str:
-        return ".".join([
-            self.experiment_id,
-            self.frequency,
-            self.member_id,
-            self.modeling_realm,
-            name,
-        ])
+    @staticmethod
+    def variables():
+        """Helper function to make maintaining this script easier if the
+           catalog variable ids change.
 
-    def variables(self):
+        Returns:
+            Dictionary mapping the names used in this script to the catalog
+            variable ids.
+        """
         return {
             "black_carbon": "blk_crb",
             "black_carbon_column": "blk_crb_col",
@@ -89,6 +68,7 @@ def requires(self):
             "dimensions": {
                 "lat": {"standard_name": "latitude"},
                 "lon": {"standard_name": "longitude"},
+                "pfull": {"standard_name": "air_pressure"},
                 "time": {"standard_name": "time"}
             },
             "varlist": {
@@ -175,46 +155,46 @@ def requires(self):
             },
         })
 
-    def run_analysis(self, catalog, png_dir, reference_catalog=None):
+    def run_analysis(self, catalog, png_dir, reference_catalog=None, config={}):
         """Runs the analysis and generates all plots and associated datasets.
 
         Args:
             catalog: Path to a catalog.
             png_dir: Path to the directory where the figures will be made.
             reference_catalog: Path to a catalog of reference data.
+            config: Dictionary of catalog metadata.  Will overwrite the
+                    data defined in the Metadata helper class if they both
+                    contain the same keys.
 
         Returns:
             A list of paths to the figures that were created.
+
+        Raises:
+            ValueError if the catalog cannot be filtered correctly.
         """
 
         # Connect to the catalog and find the necessary datasets.
         catalog = intake.open_esm_datastore(catalog)
 
         maps = {}
         for name, variable in self.metadata.variables().items():
-            # Get the dataset out of the catalog.
-            args = self.metadata.catalog_search_args(variable)
-
-            datasets = catalog.search(
-                **self.metadata.catalog_search_args(variable)
-            ).to_dataset_dict(progressbar=False)
+            # Filter the catalog down to a single dataset for each variable.
+            query_params = {"variable_id": variable}
+            query_params.update(vars(self.metadata))
+            query_params.update(config)
+            datasets = catalog.search(**query_params).to_dataset_dict(progressbar=False)
+            if len(list(datasets.values())) != 1:
+                raise ValueError("could not filter the catalog down to a single dataset.")
+            dataset = list(datasets.values())[0]
 
             if name.endswith("column"):
                 # Lon-lat maps.
-                maps[name] = LonLatMap.from_xarray_dataset(
-                    datasets[self.metadata.catalog_key(variable)],
-                    variable,
-                    time_method="annual mean",
-                    year=2010,
-                )
+                maps[name] = LonLatMap.from_xarray_dataset(dataset, variable, year=1980,
+                                                           time_method="annual mean")
             else:
-                maps[name] = ZonalMeanMap.from_xarray_dataset(
-                    datasets[self.metadata.catalog_key(variable)],
-                    variable,
-                    time_method="annual mean",
-                    year=2010,
-                    invert_y_axis=True,
-                )
+                maps[name] = ZonalMeanMap.from_xarray_dataset(dataset, variable, year=1980,
+                                                              time_method="annual mean",
+                                                              invert_y_axis=True)
 
         figure_paths = []
         for name in self.metadata.variables().keys():

diff --git a/freanalysis_clouds/freanalysis_clouds/__init__.py b/freanalysis_clouds/freanalysis_clouds/__init__.py
@@ -5,56 +5,32 @@
 from analysis_scripts import AnalysisScript
 from figure_tools import Figure, LonLatMap
 import intake
-import intake_esm
 
 
 @dataclass
 class Metadata:
-    activity_id: str = "dev"
-    institution_id: str = ""
-    source_id: str = ""
-    experiment_id: str = "c96L65_am5f7b11r0_amip"
-    frequency: str = "P1M"
-    modeling_realm: str = "atmos"
-    table_id: str = ""
-    member_id: str = "na"
-    grid_label: str = ""
-    temporal_subset: str = ""
-    chunk_freq: str = ""
-    platform: str = ""
-    cell_methods: str = ""
-    chunk_freq: str = "P1Y"
-
-    def catalog_search_args(self, name):
-        return {
-            "experiment_id": self.experiment_id,
-            "frequency": self.frequency,
-            "member_id": self.member_id,
-            "modeling_realm": self.modeling_realm,
-            "variable_id": name,
-        }
+    """Helper class that stores the metadata needed by the plugin."""
+    frequency: str = "monthly"
+    realm: str = "atmos"
+
+    @staticmethod
+    def variables():
+        """Helper function to make maintaining this script easier if the
+           catalog variable ids change.
 
-    def catalog_key(self, name) -> str:
-        return ".".join([
-            self.source_id,
-            self.experiment_id,
-            self.frequency,
-            self.member_id,
-            self.modeling_realm,
-            name,
-            self.chunk_freq
-        ])
-
-    def variables(self):
+        Returns:
+            Dictionary mapping the names used in this script to the catalog
+            variable ids.
+        """
         return {
             "high_cloud_fraction": "high_cld_amt",
             "low_cloud_fraction": "low_cld_amt",
             "middle_cloud_fraction": "mid_cld_amt",
         }
 
 
-class AerosolAnalysisScript(AnalysisScript):
-    """Aerosol analysis script.
+class CloudAnalysisScript(AnalysisScript):
+    """Cloud analysis script.
 
     Attributes:
        description: Longer form description for the analysis.
@@ -99,41 +75,47 @@ def requires(self):
             },
         })
 
-    def run_analysis(self, catalog, png_dir, reference_catalog=None):
+    def run_analysis(self, catalog, png_dir, reference_catalog=None, config={}):
         """Runs the analysis and generates all plots and associated datasets.
 
         Args:
             catalog: Path to a catalog.
             png_dir: Path to the directory where the figures will be made.
             reference_catalog: Path to a catalog of reference data.
+            config: Dictonary of catalog metadata.  Will overwrite the
+                    data defined in the Metadata helper class if they both
+                    contain the same keys.
 
         Returns:
             A list of paths to the figures that were created.
+
+        Raises:
+            ValueError if the catalog cannot be filtered correctly.
         """
 
-        # Connect to the catalog and find the necessary datasets.
+        # Connect to the catalog.
         catalog = intake.open_esm_datastore(catalog)
 
         maps = {}
         for name, variable in self.metadata.variables().items():
-            # Get the dataset out of the catalog.
-            args = self.metadata.catalog_search_args(variable)
-
-            datasets = catalog.search(
-                **self.metadata.catalog_search_args(variable)
-            ).to_dataset_dict(progressbar=False)
-
-            # Lon-lat maps.
-            maps[name] = LonLatMap.from_xarray_dataset(
-                datasets[self.metadata.catalog_key(variable)],
-                variable,
-                time_method="annual mean",
-                year=1980,
-            )
-
+            # Filter the catalog down to a single dataset for each variable.
+            query_params = {"variable_id": variable}
+            query_params.update(vars(self.metadata))
+            query_params.update(config)
+            datasets = catalog.search(**query_params).to_dataset_dict(progressbar=False)
+            if len(list(datasets.values())) != 1:
+                raise ValueError("could not filter the catalog down to a single dataset.")
+            dataset = list(datasets.values())[0]
+
+            # Create Lon-lat maps.
+            maps[name] = LonLatMap.from_xarray_dataset(dataset, variable, year=1980,
+                                                       time_method="annual mean")
+
+        # Create the figure.
         figure = Figure(num_rows=3, num_columns=1, title="Cloud Fraction", size=(16, 10))
         figure.add_map(maps["high_cloud_fraction"], "High Clouds", 1, colorbar_range= [0, 100])
         figure.add_map(maps["middle_cloud_fraction"], "Middle Clouds", 2, colorbar_range=[0, 100])
         figure.add_map(maps["low_cloud_fraction"], "Low Clouds", 3, colorbar_range=[0, 100])
-        figure.save(Path(png_dir) / "cloud-fraction.png")
-        return [Path(png_dir) / "cloud-fraction.png",]
+        output = Path(png_dir) / "cloud-fraction.png"
+        figure.save(output)
+        return [output,]
diff --git a/freanalysis_radiation/freanalysis_radiation/__init__.py b/freanalysis_radiation/freanalysis_radiation/__init__.py
@@ -15,9 +15,9 @@
 class Metadata:
     activity_id: str = "dev"
     institution_id: str = ""
-    source_id: str = ""
-    experiment_id: str = "c96L65_am5f4b4r1-newrad_amip"
-    frequency: str = "monthly"
+    source_id: str = "am5"
+    experiment_id: str = "c96L65_am5f7b11r0_amip"
+    frequency: str = "P1M"
     modeling_realm: str = "atmos"
     table_id: str = ""
     member_id: str = "na"
@@ -26,6 +26,7 @@ class Metadata:
     chunk_freq: str = ""
     platform: str = ""
     cell_methods: str = ""
+    chunk_freq: str = "P1Y"
 
     def catalog_search_args(self, name):
         return {
@@ -36,15 +37,6 @@ def catalog_search_args(self, name):
             "variable_id": name,
         }
 
-    def catalog_key(self, name) -> str:
-        return ".".join([
-            self.experiment_id,
-            self.frequency,
-            self.member_id,
-            self.modeling_realm,
-            name,
-        ])
-
     def variables(self):
         return {
             "rlds": "lwdn_sfc",
@@ -257,22 +249,23 @@ def run_analysis(self, catalog, png_dir, reference_catalog=None):
             datasets = catalog.search(
                 **self.metadata.catalog_search_args(variable)
             ).to_dataset_dict(progressbar=False)
+            dataset = list(datasets.values())[0]
 
             # Lon-lat maps.
             maps[name] = LonLatMap.from_xarray_dataset(
-                datasets[self.metadata.catalog_key(variable)],
+                dataset,
                 variable,
                 time_method="annual mean",
-                year=2010,
+                year=1980,
             )
 
             if name == "rlut":
                 anomalies[name] = AnomalyTimeSeries.from_xarray_dataset(
-                    datasets[self.metadata.catalog_key(variable)],
+                    dataset,
                     variable,
                 )
                 timeseries[name] = GlobalMeanTimeSeries.from_xarray_dataset(
-                    datasets[self.metadata.catalog_key(variable)],
+                    dataset,
                     variable,
                 )