From 9c6e0ae8ad3fce0d86d199e65e63612299dbe9f6 Mon Sep 17 00:00:00 2001
From: Raymond Menzel <Raymond.Menzel@noaa.gov>
Date: Tue, 9 Jul 2024 11:31:26 -0400
Subject: [PATCH 1/4] added pytest

---
 .github/workflows/ci-analysis.yml             | 47 ++++-----
 .../freanalysis_aerosol/__init__.py           | 27 ++----
 .../freanalysis_clouds/__init__.py            | 96 ++++++++-----------
 .../freanalysis_radiation/__init__.py         | 25 ++---
 test.py                                       | 10 --
 tests/mdtf_timeslice_catalog.py               |  8 --
 tests/mdtf_timeslice_catalog.yaml             | 49 +++-------
 tests/test_freanalysis_clouds.py              | 74 ++++++++++++++
 8 files changed, 162 insertions(+), 174 deletions(-)
 delete mode 100755 test.py
 delete mode 100644 tests/mdtf_timeslice_catalog.py
 create mode 100644 tests/test_freanalysis_clouds.py

diff --git a/.github/workflows/ci-analysis.yml b/.github/workflows/ci-analysis.yml
index 02dc144..97b60d8 100644
--- a/.github/workflows/ci-analysis.yml
+++ b/.github/workflows/ci-analysis.yml
@@ -1,4 +1,5 @@
-name: Python Package using Conda
+# Installs the Python dependencies and runs the freanalysis_clouds plugin.
+name: Test freanalysis_clouds plugin
 
 on: [push]
 
@@ -7,42 +8,30 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       max-parallel: 5
-
+      matrix:
+        python-version: [3.9, 3.10, 3.11]
     steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python 3.10
+    - name: Checkout repository
+      uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v3
       with:
-        python-version: '3.10'
-    - name: Add conda to system path
+        python-version: ${{ matrix.python-version }}
+    - name: Add conda to the system path
       run: |
         # $CONDA is an environment variable pointing to the root of the miniconda directory
         echo $CONDA/bin >> $GITHUB_PATH
-    - name: Install intakebuilder 
+    - name: Install dependencies
       run: |
         conda config --add channels noaa-gfdl
         conda config --append channels conda-forge
         conda install intakebuilder -c noaa-gfdl
-    - name: Download sample cloud fields mid_cld_amt, low_cld_amt, high_cld_amt
-      run: |
-           mkdir -p archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp/atmos/ts/monthly/1yr/ && cd archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp/atmos/ts/monthly/1yr/ && curl -O ftp://nomads.gfdl.noaa.gov/1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr/atmos.198001-198012.high_cld_amt.nc && curl -O ftp://nomads.gfdl.noaa.gov/1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr/atmos.198001-198012.low_cld_amt.nc && curl -O ftp://nomads.gfdl.noaa.gov/1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr/atmos.198001-198012.mid_cld_amt.nc
-           ls /home/runner/work/analysis-scripts-fork/analysis-scripts-fork/archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp/atmos/ts/monthly/1yr/
-   # - name: 'Generate catalog'
-   #   run: |
-   #     $CONDA/bin/python tests/mdtf_timeslice_catalog.py 
-    - name: 'Generate MDTF time-slice sample catalog with yaml'
+        python3 -m pip install --upgrade pip
+        cd analysis-scripts; pip install .; cd ..
+        cd figure_tools; pip install .; cd ..
+        cd freanalysis; pip install .; cd ..
+        cd freanalysis_clouds; pip install .; cd ..
+        pip install pytest
+    - name: Test freanalysis_clouds
       run: |
-         $CONDA/bin/python tests/mdtf_timeslice_catalog.py
-    - name: upload-artifacts1
-      uses: actions/upload-artifact@v4
-      with:
-          name: workflow-artifacts1 
-          path: |
-            gfdl_analysis_citest.csv
-            gfdl_analysis_citest.json
-            gfdl_analysis_citest.csv
-            gfdl_analysis_citest.json
-    - name: Download all workflow run artifacts
-      uses: actions/download-artifact@v4
-    
-
+        python3 -m pytest tests
diff --git a/freanalysis_aerosol/freanalysis_aerosol/__init__.py b/freanalysis_aerosol/freanalysis_aerosol/__init__.py
index ea0ae17..4531c28 100644
--- a/freanalysis_aerosol/freanalysis_aerosol/__init__.py
+++ b/freanalysis_aerosol/freanalysis_aerosol/__init__.py
@@ -13,10 +13,10 @@
 class Metadata:
     activity_id: str = "dev"
     institution_id: str = ""
-    source_id: str = ""
-    experiment_id: str = "c96L65_am5f4b4r1-newrad_amip"
-    frequency: str = "monthly"
-    modeling_realm: str = "atmos_month_aer"
+    source_id: str = "am5"
+    experiment_id: str = "c96L65_am5f7b11r0_amip"
+    frequency: str = "P1M"
+    modeling_realm: str = "atmos"
     table_id: str = ""
     member_id: str = "na"
     grid_label: str = ""
@@ -24,6 +24,7 @@ class Metadata:
     chunk_freq: str = ""
     platform: str = ""
     cell_methods: str = ""
+    chunk_freq: str = "P1Y"
 
     def catalog_search_args(self, name):
         return {
@@ -34,15 +35,6 @@ def catalog_search_args(self, name):
             "variable_id": name,
         }
 
-    def catalog_key(self, name) -> str:
-        return ".".join([
-            self.experiment_id,
-            self.frequency,
-            self.member_id,
-            self.modeling_realm,
-            name,
-        ])
-
     def variables(self):
         return {
             "black_carbon": "blk_crb",
@@ -198,21 +190,22 @@ def run_analysis(self, catalog, png_dir, reference_catalog=None):
             datasets = catalog.search(
                 **self.metadata.catalog_search_args(variable)
             ).to_dataset_dict(progressbar=False)
+            dataset = list(datasets.values())[0]
 
             if name.endswith("column"):
                 # Lon-lat maps.
                 maps[name] = LonLatMap.from_xarray_dataset(
-                    datasets[self.metadata.catalog_key(variable)],
+                    dataset,
                     variable,
                     time_method="annual mean",
-                    year=2010,
+                    year=1980,
                 )
             else:
                 maps[name] = ZonalMeanMap.from_xarray_dataset(
-                    datasets[self.metadata.catalog_key(variable)],
+                    dataset,
                     variable,
                     time_method="annual mean",
-                    year=2010,
+                    year=1980,
                     invert_y_axis=True,
                 )
 
diff --git a/freanalysis_clouds/freanalysis_clouds/__init__.py b/freanalysis_clouds/freanalysis_clouds/__init__.py
index a693539..ef0db95 100644
--- a/freanalysis_clouds/freanalysis_clouds/__init__.py
+++ b/freanalysis_clouds/freanalysis_clouds/__init__.py
@@ -5,47 +5,23 @@
 from analysis_scripts import AnalysisScript
 from figure_tools import Figure, LonLatMap
 import intake
-import intake_esm
 
 
 @dataclass
 class Metadata:
-    activity_id: str = "dev"
-    institution_id: str = ""
-    source_id: str = ""
-    experiment_id: str = "c96L65_am5f7b11r0_amip"
-    frequency: str = "P1M"
-    modeling_realm: str = "atmos"
-    table_id: str = ""
-    member_id: str = "na"
-    grid_label: str = ""
-    temporal_subset: str = ""
-    chunk_freq: str = ""
-    platform: str = ""
-    cell_methods: str = ""
-    chunk_freq: str = "P1Y"
-
-    def catalog_search_args(self, name):
-        return {
-            "experiment_id": self.experiment_id,
-            "frequency": self.frequency,
-            "member_id": self.member_id,
-            "modeling_realm": self.modeling_realm,
-            "variable_id": name,
-        }
+    """Helper class that stores the metadata needed by the plugin."""
+    frequency: str = "monthly"
+    realm: str = "atmos"
+
+    @staticmethod
+    def variables():
+        """Helper function to make maintaining this script easier if the
+           catalog variable ids change.
 
-    def catalog_key(self, name) -> str:
-        return ".".join([
-            self.source_id,
-            self.experiment_id,
-            self.frequency,
-            self.member_id,
-            self.modeling_realm,
-            name,
-            self.chunk_freq
-        ])
-
-    def variables(self):
+        Returns:
+            Dictionary mapping the names used in this scripts to the catalog
+            variable ids.
+        """
         return {
             "high_cloud_fraction": "high_cld_amt",
             "low_cloud_fraction": "low_cld_amt",
@@ -53,8 +29,8 @@ def variables(self):
         }
 
 
-class AerosolAnalysisScript(AnalysisScript):
-    """Aerosol analysis script.
+class CloudAnalysisScript(AnalysisScript):
+    """Cloud analysis script.
 
     Attributes:
        description: Longer form description for the analysis.
@@ -99,41 +75,47 @@ def requires(self):
             },
         })
 
-    def run_analysis(self, catalog, png_dir, reference_catalog=None):
+    def run_analysis(self, catalog, png_dir, reference_catalog=None, config={}):
         """Runs the analysis and generates all plots and associated datasets.
 
         Args:
             catalog: Path to a catalog.
             png_dir: Path to the directory where the figures will be made.
             reference_catalog: Path to a catalog of reference data.
+            config: Dictonary of catalog metadata.  Will overwrite the
+                    data defined in the Metadata helper class if they both
+                    contain the same keys.
 
         Returns:
             A list of paths to the figures that were created.
+
+        Raises:
+            ValueError if the catalog cannot be filtered correctly.
         """
 
-        # Connect to the catalog and find the necessary datasets.
+        # Connect to the catalog.
         catalog = intake.open_esm_datastore(catalog)
 
         maps = {}
         for name, variable in self.metadata.variables().items():
-            # Get the dataset out of the catalog.
-            args = self.metadata.catalog_search_args(variable)
-
-            datasets = catalog.search(
-                **self.metadata.catalog_search_args(variable)
-            ).to_dataset_dict(progressbar=False)
-
-            # Lon-lat maps.
-            maps[name] = LonLatMap.from_xarray_dataset(
-                datasets[self.metadata.catalog_key(variable)],
-                variable,
-                time_method="annual mean",
-                year=1980,
-            )
-
+            # Filter the catalog down to a single dataset for each variable.
+            query_params = {"variable_id": variable}
+            query_params.update(vars(self.metadata))
+            query_params.update(config)
+            datasets = catalog.search(**query_params).to_dataset_dict(progressbar=False)
+            if len(datasets) != 1:
+                raise ValueError("could not filter the catalog down to a single dataset.")
+            dataset = list(datasets.values())[0]
+
+            # Create Lon-lat maps.
+            maps[name] = LonLatMap.from_xarray_dataset(dataset, variable, year=1980,
+                                                       time_method="annual mean")
+
+        # Create the figure.
         figure = Figure(num_rows=3, num_columns=1, title="Cloud Fraction", size=(16, 10))
         figure.add_map(maps["high_cloud_fraction"], "High Clouds", 1, colorbar_range= [0, 100])
         figure.add_map(maps["middle_cloud_fraction"], "Middle Clouds", 2, colorbar_range=[0, 100])
         figure.add_map(maps["low_cloud_fraction"], "Low Clouds", 3, colorbar_range=[0, 100])
-        figure.save(Path(png_dir) / "cloud-fraction.png")
-        return [Path(png_dir) / "cloud-fraction.png",]
+        output = Path(png_dir) / "cloud-fraction.png"
+        figure.save(output)
+        return [output,]
diff --git a/freanalysis_radiation/freanalysis_radiation/__init__.py b/freanalysis_radiation/freanalysis_radiation/__init__.py
index 31e0eed..827cc18 100644
--- a/freanalysis_radiation/freanalysis_radiation/__init__.py
+++ b/freanalysis_radiation/freanalysis_radiation/__init__.py
@@ -15,9 +15,9 @@
 class Metadata:
     activity_id: str = "dev"
     institution_id: str = ""
-    source_id: str = ""
-    experiment_id: str = "c96L65_am5f4b4r1-newrad_amip"
-    frequency: str = "monthly"
+    source_id: str = "am5"
+    experiment_id: str = "c96L65_am5f7b11r0_amip"
+    frequency: str = "P1M"
     modeling_realm: str = "atmos"
     table_id: str = ""
     member_id: str = "na"
@@ -26,6 +26,7 @@ class Metadata:
     chunk_freq: str = ""
     platform: str = ""
     cell_methods: str = ""
+    chunk_freq: str = "P1Y"
 
     def catalog_search_args(self, name):
         return {
@@ -36,15 +37,6 @@ def catalog_search_args(self, name):
             "variable_id": name,
         }
 
-    def catalog_key(self, name) -> str:
-        return ".".join([
-            self.experiment_id,
-            self.frequency,
-            self.member_id,
-            self.modeling_realm,
-            name,
-        ])
-
     def variables(self):
         return {
             "rlds": "lwdn_sfc",
@@ -257,22 +249,23 @@ def run_analysis(self, catalog, png_dir, reference_catalog=None):
             datasets = catalog.search(
                 **self.metadata.catalog_search_args(variable)
             ).to_dataset_dict(progressbar=False)
+            dataset = list(datasets.values())[0]
 
             # Lon-lat maps.
             maps[name] = LonLatMap.from_xarray_dataset(
-                datasets[self.metadata.catalog_key(variable)],
+                dataset,
                 variable,
                 time_method="annual mean",
-                year=2010,
+                year=1980,
             )
 
             if name == "rlut":
                 anomalies[name] = AnomalyTimeSeries.from_xarray_dataset(
-                    datasets[self.metadata.catalog_key(variable)],
+                    dataset,
                     variable,
                 )
                 timeseries[name] = GlobalMeanTimeSeries.from_xarray_dataset(
-                    datasets[self.metadata.catalog_key(variable)],
+                    dataset,
                     variable,
                 )
 
diff --git a/test.py b/test.py
deleted file mode 100755
index 64b2bdd..0000000
--- a/test.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from os import environ
-
-from freanalysis.plugins import list_plugins, plugin_requirements, run_plugin
-
-
-name = "freanalysis_clouds"
-reqs = plugin_requirements(name)
-print(reqs)
-catalog = environ["CATALOG_JSON"]
-run_plugin(name, catalog, "pngs")
diff --git a/tests/mdtf_timeslice_catalog.py b/tests/mdtf_timeslice_catalog.py
deleted file mode 100644
index eeb00bc..0000000
--- a/tests/mdtf_timeslice_catalog.py
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env python
-
-from scripts import gen_intake_gfdl
-import sys
-
-sys.argv = ['input_path','--config', '/home/runner/work/analysis-scripts-fork/analysis-scripts-fork/tests/mdtf_timeslice_catalog.yaml']
-print(sys.argv)
-gen_intake_gfdl.main()
diff --git a/tests/mdtf_timeslice_catalog.yaml b/tests/mdtf_timeslice_catalog.yaml
index b5e76b1..05325ed 100644
--- a/tests/mdtf_timeslice_catalog.yaml
+++ b/tests/mdtf_timeslice_catalog.yaml
@@ -1,40 +1,15 @@
-#what kind of directory structure to expect? 
-#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
-# the output_path_template is set as follows.
-#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we
-#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example
-#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure
-#this is a valid value in headerlist as well.
-#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
-#for the fourth value.
-
-#catalog headers
-#The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction
-#with the ESM collection specification standards and the appropriate workflows.
-
+# Catalog headers
+# The headerlist is expected column names in your catalog/csv file. This is usually
+# determined by the users in conjuction with the ESM collection specification standards
+# and the appropriate workflows.
 headerlist: ["activity_id", "institution_id", "source_id", "experiment_id",
-                  "frequency", "modeling_realm", "table_id",
-                  "member_id", "grid_label", "variable_id",
-                  "temporal_subset", "chunk_freq","grid_label","platform","dimensions","cell_methods","path"]
-
-#what kind of directory structure to expect?
-#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
-# the output_path_template is set as follows.
-#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we
-#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example
-#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure
-#this is a valid value in headerlist as well.
-#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
-#for the fourth value.
-
-output_path_template: ['NA','NA','NA','NA','NA','NA','NA','source_id','experiment_id','NA','platform','custom_pp','modeling_realm','cell_methods','frequency','chunk_freq']
-
-output_file_template: ['modeling_realm','temporal_subset','variable_id']
+             "frequency", "modeling_realm", "table_id",
+             "member_id", "grid_label", "variable_id",
+             "temporal_subset", "chunk_freq", "grid_label", "platform",
+             "dimensions", "cell_methods", "path"]
 
-#OUTPUT FILE INFO is currently passed as command-line argument.
-#We will revisit adding a csvfile, jsonfile and logfile configuration to the builder configuration file in the future.
-#csvfile =  #jsonfile =  #logfile =
+output_path_template: ['NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA',
+                       'source_id', 'experiment_id', 'NA', 'platform', 'custom_pp',
+                       'modeling_realm', 'cell_methods', 'frequency', 'chunk_freq']
 
-#######################################################
-input_path: "/home/runner/work/analysis-scripts-fork/analysis-scripts-fork/archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp" #"ENTER INPUT PATH HERE" #Example: /Users/ar46/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/"
-output_path: "gfdl_analysis_citest" #ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)
+output_file_template: ['modeling_realm', 'temporal_subset', 'variable_id']
diff --git a/tests/test_freanalysis_clouds.py b/tests/test_freanalysis_clouds.py
new file mode 100644
index 0000000..374b8fb
--- /dev/null
+++ b/tests/test_freanalysis_clouds.py
@@ -0,0 +1,74 @@
+from ftplib import FTP
+from os import chdir, environ
+from pathlib import Path
+from subprocess import run
+from tempfile import TemporaryDirectory
+
+from freanalysis.plugins import list_plugins, plugin_requirements, run_plugin
+from scripts import gen_intake_gfdl
+
+
+def download_test_data(stem):
+    """Downloads test datasets from a FTP server.
+
+    Args:
+        stem: Directory to create the directory tree inside.
+
+    Returns:
+        Path to the directory that will be used as the root of the data catalog.
+    """
+    # Create local directory tree with the appropriate directory structure.
+    catalog_root = Path(stem) / "archive" / "oar.gfdl.mdtf" / "MDTF-examples" / \
+                   "mdtf-time-slice-example" / "gfdl.ncrc5-deploy-prod-openmp" / "pp"
+    data_directory = catalog_root / "atmos" / "ts" / "monthly" / "1yr"
+    data_directory.mkdir(parents=True, exist_ok=True)
+
+    # Download the datasets from the FTP server.
+    path = "1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr"
+    with FTP("nomads.gfdl.noaa.gov") as ftp:
+        ftp.login()
+        ftp.cwd(path)
+        for variable in ["high_cld_amt", "mid_cld_amt", "low_cld_amt"]:
+            name = f"atmos.198001-198012.{variable}.nc"
+            ftp.retrbinary(f"RETR {name}", open(data_directory / name, "wb").write)
+    return catalog_root.resolve()
+
+
+def create_data_catalog(path, output="data-catalog"):
+    """Creates a data catalog from a directory tree.
+
+    Args:
+        path: Path to the catalog root.
+        output: Name of the data catalog that will be created.
+
+    Returns:
+        Paths to the data catalog json and csv files.
+    """
+    yaml_path = Path(__file__).resolve().parent / "mdtf_timeslice_catalog.yaml"
+
+    # Hack to stop click from exiting.
+    command = ["python3", "-m", "scripts.gen_intake_gfdl", str(path), output,
+               "--config", str(yaml_path)]
+    run(command, check=True)
+    return Path(f"{output}.json").resolve(), Path(f"{output}.csv").resolve()
+
+
+def plugin(json, pngs_directory="pngs"):
+    """Run the plugin to create the figure.
+
+    Args:
+        json: Path to the catalog json file.
+        pngs_directory: Directory to store the output in.
+    """
+    name = "freanalysis_clouds"
+    reqs = plugin_requirements(name)
+    Path(pngs_directory).mkdir(parents=True, exist_ok=True)
+    run_plugin(name, json, pngs_directory)
+
+
+def test_freanalysis_clouds():
+    with TemporaryDirectory() as tmp:
+        chdir(Path(tmp))
+        path = download_test_data(stem=tmp)
+        json, csv = create_data_catalog(path)
+        plugin("data-catalog.json")

From a0fa57d9d15556bb55113846ae3297ab2bec630e Mon Sep 17 00:00:00 2001
From: Raymond Menzel <Raymond.Menzel@noaa.gov>
Date: Tue, 9 Jul 2024 11:34:30 -0400
Subject: [PATCH 2/4] add quotes?

---
 .github/workflows/ci-analysis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-analysis.yml b/.github/workflows/ci-analysis.yml
index 97b60d8..36e6b6d 100644
--- a/.github/workflows/ci-analysis.yml
+++ b/.github/workflows/ci-analysis.yml
@@ -9,7 +9,7 @@ jobs:
     strategy:
       max-parallel: 5
       matrix:
-        python-version: [3.9, 3.10, 3.11]
+        python-version: ['3.9', '3.10', '3.11']
     steps:
     - name: Checkout repository
       uses: actions/checkout@v4

From ad0691a81fbbc248cb0a452d3a71e2974ebddca1 Mon Sep 17 00:00:00 2001
From: Raymond Menzel <Raymond.Menzel@noaa.gov>
Date: Tue, 9 Jul 2024 11:51:46 -0400
Subject: [PATCH 3/4] try without conda

---
 .github/workflows/ci-analysis.yml | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci-analysis.yml b/.github/workflows/ci-analysis.yml
index 36e6b6d..a9f8748 100644
--- a/.github/workflows/ci-analysis.yml
+++ b/.github/workflows/ci-analysis.yml
@@ -17,20 +17,15 @@ jobs:
       uses: actions/setup-python@v3
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Add conda to the system path
-      run: |
-        # $CONDA is an environment variable pointing to the root of the miniconda directory
-        echo $CONDA/bin >> $GITHUB_PATH
     - name: Install dependencies
       run: |
-        conda config --add channels noaa-gfdl
-        conda config --append channels conda-forge
-        conda install intakebuilder -c noaa-gfdl
         python3 -m pip install --upgrade pip
         cd analysis-scripts; pip install .; cd ..
         cd figure_tools; pip install .; cd ..
         cd freanalysis; pip install .; cd ..
         cd freanalysis_clouds; pip install .; cd ..
+        git clone https://github.com/aradhakrishnanGFDL/CatalogBuilder catalogbuilder
+        cd catalogbuilder; pip install .; cd ..
         pip install pytest
     - name: Test freanalysis_clouds
       run: |

From 3dc90235119ca31cfefc9c8fa2c672c3520e42f3 Mon Sep 17 00:00:00 2001
From: Raymond Menzel <Raymond.Menzel@noaa.gov>
Date: Tue, 9 Jul 2024 12:13:23 -0400
Subject: [PATCH 4/4] further cleanup

---
 .../freanalysis_aerosol/__init__.py           | 77 ++++++++-----------
 .../freanalysis_clouds/__init__.py            |  4 +-
 tests/test_freanalysis_clouds.py              |  2 +-
 3 files changed, 35 insertions(+), 48 deletions(-)

diff --git a/freanalysis_aerosol/freanalysis_aerosol/__init__.py b/freanalysis_aerosol/freanalysis_aerosol/__init__.py
index 4531c28..bc8971a 100644
--- a/freanalysis_aerosol/freanalysis_aerosol/__init__.py
+++ b/freanalysis_aerosol/freanalysis_aerosol/__init__.py
@@ -6,36 +6,23 @@
 from figure_tools import LonLatMap, zonal_mean_vertical_and_column_integrated_map, \
                          ZonalMeanMap
 import intake
-import intake_esm
 
 
 @dataclass
 class Metadata:
-    activity_id: str = "dev"
-    institution_id: str = ""
-    source_id: str = "am5"
-    experiment_id: str = "c96L65_am5f7b11r0_amip"
-    frequency: str = "P1M"
-    modeling_realm: str = "atmos"
-    table_id: str = ""
-    member_id: str = "na"
-    grid_label: str = ""
-    temporal_subset: str = ""
-    chunk_freq: str = ""
-    platform: str = ""
-    cell_methods: str = ""
-    chunk_freq: str = "P1Y"
-
-    def catalog_search_args(self, name):
-        return {
-            "experiment_id": self.experiment_id,
-            "frequency": self.frequency,
-            "member_id": self.member_id,
-            "modeling_realm": self.modeling_realm,
-            "variable_id": name,
-        }
+    """Helper class that stores the metadata needed by the plugin."""
+    frequency: str = "monthly"
+    realm: str = "atmos"
 
-    def variables(self):
+    @staticmethod
+    def variables():
+        """Helper function to make maintaining this script easier if the
+           catalog variable ids change.
+
+        Returns:
+            Dictionary mapping the names used in this script to the catalog
+            variable ids.
+        """
         return {
             "black_carbon": "blk_crb",
             "black_carbon_column": "blk_crb_col",
@@ -81,6 +68,7 @@ def requires(self):
             "dimensions": {
                 "lat": {"standard_name": "latitude"},
                 "lon": {"standard_name": "longitude"},
+                "pfull": {"standard_name": "air_pressure"},
                 "time": {"standard_name": "time"}
             },
             "varlist": {
@@ -167,16 +155,22 @@ def requires(self):
             },
         })
 
-    def run_analysis(self, catalog, png_dir, reference_catalog=None):
+    def run_analysis(self, catalog, png_dir, reference_catalog=None, config={}):
         """Runs the analysis and generates all plots and associated datasets.
 
         Args:
             catalog: Path to a catalog.
             png_dir: Path to the directory where the figures will be made.
             reference_catalog: Path to a catalog of reference data.
+            config: Dictionary of catalog metadata.  Will overwrite the
+                    data defined in the Metadata helper class if they both
+                    contain the same keys.
 
         Returns:
             A list of paths to the figures that were created.
+
+        Raises:
+            ValueError if the catalog cannot be filtered correctly.
         """
 
         # Connect to the catalog and find the necessary datasets.
@@ -184,30 +178,23 @@ def run_analysis(self, catalog, png_dir, reference_catalog=None):
 
         maps = {}
         for name, variable in self.metadata.variables().items():
-            # Get the dataset out of the catalog.
-            args = self.metadata.catalog_search_args(variable)
-
-            datasets = catalog.search(
-                **self.metadata.catalog_search_args(variable)
-            ).to_dataset_dict(progressbar=False)
+            # Filter the catalog down to a single dataset for each variable.
+            query_params = {"variable_id": variable}
+            query_params.update(vars(self.metadata))
+            query_params.update(config)
+            datasets = catalog.search(**query_params).to_dataset_dict(progressbar=False)
+            if len(list(datasets.values())) != 1:
+                raise ValueError("could not filter the catalog down to a single dataset.")
             dataset = list(datasets.values())[0]
 
             if name.endswith("column"):
                 # Lon-lat maps.
-                maps[name] = LonLatMap.from_xarray_dataset(
-                    dataset,
-                    variable,
-                    time_method="annual mean",
-                    year=1980,
-                )
+                maps[name] = LonLatMap.from_xarray_dataset(dataset, variable, year=1980,
+                                                           time_method="annual mean")
             else:
-                maps[name] = ZonalMeanMap.from_xarray_dataset(
-                    dataset,
-                    variable,
-                    time_method="annual mean",
-                    year=1980,
-                    invert_y_axis=True,
-                )
+                maps[name] = ZonalMeanMap.from_xarray_dataset(dataset, variable, year=1980,
+                                                              time_method="annual mean",
+                                                              invert_y_axis=True)
 
         figure_paths = []
         for name in self.metadata.variables().keys():
diff --git a/freanalysis_clouds/freanalysis_clouds/__init__.py b/freanalysis_clouds/freanalysis_clouds/__init__.py
index ef0db95..3c3724f 100644
--- a/freanalysis_clouds/freanalysis_clouds/__init__.py
+++ b/freanalysis_clouds/freanalysis_clouds/__init__.py
@@ -19,7 +19,7 @@ def variables():
            catalog variable ids change.
 
         Returns:
-            Dictionary mapping the names used in this scripts to the catalog
+            Dictionary mapping the names used in this script to the catalog
             variable ids.
         """
         return {
@@ -103,7 +103,7 @@ def run_analysis(self, catalog, png_dir, reference_catalog=None, config={}):
             query_params.update(vars(self.metadata))
             query_params.update(config)
             datasets = catalog.search(**query_params).to_dataset_dict(progressbar=False)
-            if len(datasets) != 1:
+            if len(list(datasets.values())) != 1:
                 raise ValueError("could not filter the catalog down to a single dataset.")
             dataset = list(datasets.values())[0]
 
diff --git a/tests/test_freanalysis_clouds.py b/tests/test_freanalysis_clouds.py
index 374b8fb..eb7b889 100644
--- a/tests/test_freanalysis_clouds.py
+++ b/tests/test_freanalysis_clouds.py
@@ -71,4 +71,4 @@ def test_freanalysis_clouds():
         chdir(Path(tmp))
         path = download_test_data(stem=tmp)
         json, csv = create_data_catalog(path)
-        plugin("data-catalog.json")
+        plugin(json)