From 9c6e0ae8ad3fce0d86d199e65e63612299dbe9f6 Mon Sep 17 00:00:00 2001 From: Raymond Menzel Date: Tue, 9 Jul 2024 11:31:26 -0400 Subject: [PATCH 1/4] added pytest --- .github/workflows/ci-analysis.yml | 47 ++++----- .../freanalysis_aerosol/__init__.py | 27 ++---- .../freanalysis_clouds/__init__.py | 96 ++++++++----------- .../freanalysis_radiation/__init__.py | 25 ++--- test.py | 10 -- tests/mdtf_timeslice_catalog.py | 8 -- tests/mdtf_timeslice_catalog.yaml | 49 +++------- tests/test_freanalysis_clouds.py | 74 ++++++++++++++ 8 files changed, 162 insertions(+), 174 deletions(-) delete mode 100755 test.py delete mode 100644 tests/mdtf_timeslice_catalog.py create mode 100644 tests/test_freanalysis_clouds.py diff --git a/.github/workflows/ci-analysis.yml b/.github/workflows/ci-analysis.yml index 02dc144..97b60d8 100644 --- a/.github/workflows/ci-analysis.yml +++ b/.github/workflows/ci-analysis.yml @@ -1,4 +1,5 @@ -name: Python Package using Conda +# Installs the Python dependencies and runs the freanalysis_clouds plugin. +name: Test freanalysis_clouds plugin on: [push] @@ -7,42 +8,30 @@ jobs: runs-on: ubuntu-latest strategy: max-parallel: 5 - + matrix: + python-version: [3.9, 3.10, 3.11] steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.10 + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: - python-version: '3.10' - - name: Add conda to system path + python-version: ${{ matrix.python-version }} + - name: Add conda to the system path run: | # $CONDA is an environment variable pointing to the root of the miniconda directory echo $CONDA/bin >> $GITHUB_PATH - - name: Install intakebuilder + - name: Install dependencies run: | conda config --add channels noaa-gfdl conda config --append channels conda-forge conda install intakebuilder -c noaa-gfdl - - name: Download sample cloud fields mid_cld_amt, low_cld_amt, high_cld_amt - run: | - mkdir -p archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp/atmos/ts/monthly/1yr/ && cd archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp/atmos/ts/monthly/1yr/ && curl -O ftp://nomads.gfdl.noaa.gov/1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr/atmos.198001-198012.high_cld_amt.nc && curl -O ftp://nomads.gfdl.noaa.gov/1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr/atmos.198001-198012.low_cld_amt.nc && curl -O ftp://nomads.gfdl.noaa.gov/1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr/atmos.198001-198012.mid_cld_amt.nc - ls /home/runner/work/analysis-scripts-fork/analysis-scripts-fork/archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp/atmos/ts/monthly/1yr/ - # - name: 'Generate catalog' - # run: | - # $CONDA/bin/python tests/mdtf_timeslice_catalog.py - - name: 'Generate MDTF time-slice sample catalog with yaml' + python3 -m pip install --upgrade pip + cd analysis-scripts; pip install .; cd .. + cd figure_tools; pip install .; cd .. + cd freanalysis; pip install .; cd .. + cd freanalysis_clouds; pip install .; cd .. + pip install pytest + - name: Test freanalysis_clouds run: | - $CONDA/bin/python tests/mdtf_timeslice_catalog.py - - name: upload-artifacts1 - uses: actions/upload-artifact@v4 - with: - name: workflow-artifacts1 - path: | - gfdl_analysis_citest.csv - gfdl_analysis_citest.json - gfdl_analysis_citest.csv - gfdl_analysis_citest.json - - name: Download all workflow run artifacts - uses: actions/download-artifact@v4 - - + python3 -m pytest tests diff --git a/freanalysis_aerosol/freanalysis_aerosol/__init__.py b/freanalysis_aerosol/freanalysis_aerosol/__init__.py index ea0ae17..4531c28 100644 --- a/freanalysis_aerosol/freanalysis_aerosol/__init__.py +++ b/freanalysis_aerosol/freanalysis_aerosol/__init__.py @@ -13,10 +13,10 @@ class Metadata: activity_id: str = "dev" institution_id: str = "" - source_id: str = "" - experiment_id: str = "c96L65_am5f4b4r1-newrad_amip" - frequency: str = "monthly" - modeling_realm: str = "atmos_month_aer" + source_id: str = "am5" + experiment_id: str = "c96L65_am5f7b11r0_amip" + frequency: str = "P1M" + modeling_realm: str = "atmos" table_id: str = "" member_id: str = "na" grid_label: str = "" @@ -24,6 +24,7 @@ class Metadata: chunk_freq: str = "" platform: str = "" cell_methods: str = "" + chunk_freq: str = "P1Y" def catalog_search_args(self, name): return { @@ -34,15 +35,6 @@ def catalog_search_args(self, name): "variable_id": name, } - def catalog_key(self, name) -> str: - return ".".join([ - self.experiment_id, - self.frequency, - self.member_id, - self.modeling_realm, - name, - ]) - def variables(self): return { "black_carbon": "blk_crb", @@ -198,21 +190,22 @@ def run_analysis(self, catalog, png_dir, reference_catalog=None): datasets = catalog.search( **self.metadata.catalog_search_args(variable) ).to_dataset_dict(progressbar=False) + dataset = list(datasets.values())[0] if name.endswith("column"): # Lon-lat maps. maps[name] = LonLatMap.from_xarray_dataset( - datasets[self.metadata.catalog_key(variable)], + dataset, variable, time_method="annual mean", - year=2010, + year=1980, ) else: maps[name] = ZonalMeanMap.from_xarray_dataset( - datasets[self.metadata.catalog_key(variable)], + dataset, variable, time_method="annual mean", - year=2010, + year=1980, invert_y_axis=True, ) diff --git a/freanalysis_clouds/freanalysis_clouds/__init__.py b/freanalysis_clouds/freanalysis_clouds/__init__.py index a693539..ef0db95 100644 --- a/freanalysis_clouds/freanalysis_clouds/__init__.py +++ b/freanalysis_clouds/freanalysis_clouds/__init__.py @@ -5,47 +5,23 @@ from analysis_scripts import AnalysisScript from figure_tools import Figure, LonLatMap import intake -import intake_esm @dataclass class Metadata: - activity_id: str = "dev" - institution_id: str = "" - source_id: str = "" - experiment_id: str = "c96L65_am5f7b11r0_amip" - frequency: str = "P1M" - modeling_realm: str = "atmos" - table_id: str = "" - member_id: str = "na" - grid_label: str = "" - temporal_subset: str = "" - chunk_freq: str = "" - platform: str = "" - cell_methods: str = "" - chunk_freq: str = "P1Y" - - def catalog_search_args(self, name): - return { - "experiment_id": self.experiment_id, - "frequency": self.frequency, - "member_id": self.member_id, - "modeling_realm": self.modeling_realm, - "variable_id": name, - } + """Helper class that stores the metadata needed by the plugin.""" + frequency: str = "monthly" + realm: str = "atmos" + + @staticmethod + def variables(): + """Helper function to make maintaining this script easier if the + catalog variable ids change. - def catalog_key(self, name) -> str: - return ".".join([ - self.source_id, - self.experiment_id, - self.frequency, - self.member_id, - self.modeling_realm, - name, - self.chunk_freq - ]) - - def variables(self): + Returns: + Dictionary mapping the names used in this scripts to the catalog + variable ids. + """ return { "high_cloud_fraction": "high_cld_amt", "low_cloud_fraction": "low_cld_amt", @@ -53,8 +29,8 @@ def variables(self): } -class AerosolAnalysisScript(AnalysisScript): - """Aerosol analysis script. +class CloudAnalysisScript(AnalysisScript): + """Cloud analysis script. Attributes: description: Longer form description for the analysis. @@ -99,41 +75,47 @@ def requires(self): }, }) - def run_analysis(self, catalog, png_dir, reference_catalog=None): + def run_analysis(self, catalog, png_dir, reference_catalog=None, config={}): """Runs the analysis and generates all plots and associated datasets. Args: catalog: Path to a catalog. png_dir: Path to the directory where the figures will be made. reference_catalog: Path to a catalog of reference data. + config: Dictonary of catalog metadata. Will overwrite the + data defined in the Metadata helper class if they both + contain the same keys. Returns: A list of paths to the figures that were created. + + Raises: + ValueError if the catalog cannot be filtered correctly. """ - # Connect to the catalog and find the necessary datasets. + # Connect to the catalog. catalog = intake.open_esm_datastore(catalog) maps = {} for name, variable in self.metadata.variables().items(): - # Get the dataset out of the catalog. - args = self.metadata.catalog_search_args(variable) - - datasets = catalog.search( - **self.metadata.catalog_search_args(variable) - ).to_dataset_dict(progressbar=False) - - # Lon-lat maps. - maps[name] = LonLatMap.from_xarray_dataset( - datasets[self.metadata.catalog_key(variable)], - variable, - time_method="annual mean", - year=1980, - ) - + # Filter the catalog down to a single dataset for each variable. + query_params = {"variable_id": variable} + query_params.update(vars(self.metadata)) + query_params.update(config) + datasets = catalog.search(**query_params).to_dataset_dict(progressbar=False) + if len(datasets) != 1: + raise ValueError("could not filter the catalog down to a single dataset.") + dataset = list(datasets.values())[0] + + # Create Lon-lat maps. + maps[name] = LonLatMap.from_xarray_dataset(dataset, variable, year=1980, + time_method="annual mean") + + # Create the figure. figure = Figure(num_rows=3, num_columns=1, title="Cloud Fraction", size=(16, 10)) figure.add_map(maps["high_cloud_fraction"], "High Clouds", 1, colorbar_range= [0, 100]) figure.add_map(maps["middle_cloud_fraction"], "Middle Clouds", 2, colorbar_range=[0, 100]) figure.add_map(maps["low_cloud_fraction"], "Low Clouds", 3, colorbar_range=[0, 100]) - figure.save(Path(png_dir) / "cloud-fraction.png") - return [Path(png_dir) / "cloud-fraction.png",] + output = Path(png_dir) / "cloud-fraction.png" + figure.save(output) + return [output,] diff --git a/freanalysis_radiation/freanalysis_radiation/__init__.py b/freanalysis_radiation/freanalysis_radiation/__init__.py index 31e0eed..827cc18 100644 --- a/freanalysis_radiation/freanalysis_radiation/__init__.py +++ b/freanalysis_radiation/freanalysis_radiation/__init__.py @@ -15,9 +15,9 @@ class Metadata: activity_id: str = "dev" institution_id: str = "" - source_id: str = "" - experiment_id: str = "c96L65_am5f4b4r1-newrad_amip" - frequency: str = "monthly" + source_id: str = "am5" + experiment_id: str = "c96L65_am5f7b11r0_amip" + frequency: str = "P1M" modeling_realm: str = "atmos" table_id: str = "" member_id: str = "na" @@ -26,6 +26,7 @@ class Metadata: chunk_freq: str = "" platform: str = "" cell_methods: str = "" + chunk_freq: str = "P1Y" def catalog_search_args(self, name): return { @@ -36,15 +37,6 @@ def catalog_search_args(self, name): "variable_id": name, } - def catalog_key(self, name) -> str: - return ".".join([ - self.experiment_id, - self.frequency, - self.member_id, - self.modeling_realm, - name, - ]) - def variables(self): return { "rlds": "lwdn_sfc", @@ -257,22 +249,23 @@ def run_analysis(self, catalog, png_dir, reference_catalog=None): datasets = catalog.search( **self.metadata.catalog_search_args(variable) ).to_dataset_dict(progressbar=False) + dataset = list(datasets.values())[0] # Lon-lat maps. maps[name] = LonLatMap.from_xarray_dataset( - datasets[self.metadata.catalog_key(variable)], + dataset, variable, time_method="annual mean", - year=2010, + year=1980, ) if name == "rlut": anomalies[name] = AnomalyTimeSeries.from_xarray_dataset( - datasets[self.metadata.catalog_key(variable)], + dataset, variable, ) timeseries[name] = GlobalMeanTimeSeries.from_xarray_dataset( - datasets[self.metadata.catalog_key(variable)], + dataset, variable, ) diff --git a/test.py b/test.py deleted file mode 100755 index 64b2bdd..0000000 --- a/test.py +++ /dev/null @@ -1,10 +0,0 @@ -from os import environ - -from freanalysis.plugins import list_plugins, plugin_requirements, run_plugin - - -name = "freanalysis_clouds" -reqs = plugin_requirements(name) -print(reqs) -catalog = environ["CATALOG_JSON"] -run_plugin(name, catalog, "pngs") diff --git a/tests/mdtf_timeslice_catalog.py b/tests/mdtf_timeslice_catalog.py deleted file mode 100644 index eeb00bc..0000000 --- a/tests/mdtf_timeslice_catalog.py +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python - -from scripts import gen_intake_gfdl -import sys - -sys.argv = ['input_path','--config', '/home/runner/work/analysis-scripts-fork/analysis-scripts-fork/tests/mdtf_timeslice_catalog.yaml'] -print(sys.argv) -gen_intake_gfdl.main() diff --git a/tests/mdtf_timeslice_catalog.yaml b/tests/mdtf_timeslice_catalog.yaml index b5e76b1..05325ed 100644 --- a/tests/mdtf_timeslice_catalog.yaml +++ b/tests/mdtf_timeslice_catalog.yaml @@ -1,40 +1,15 @@ -#what kind of directory structure to expect? -#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp -# the output_path_template is set as follows. -#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we -#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example -#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure -#this is a valid value in headerlist as well. -#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template -#for the fourth value. - -#catalog headers -#The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction -#with the ESM collection specification standards and the appropriate workflows. - +# Catalog headers +# The headerlist is expected column names in your catalog/csv file. This is usually +# determined by the users in conjuction with the ESM collection specification standards +# and the appropriate workflows. headerlist: ["activity_id", "institution_id", "source_id", "experiment_id", - "frequency", "modeling_realm", "table_id", - "member_id", "grid_label", "variable_id", - "temporal_subset", "chunk_freq","grid_label","platform","dimensions","cell_methods","path"] - -#what kind of directory structure to expect? -#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp -# the output_path_template is set as follows. -#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we -#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example -#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure -#this is a valid value in headerlist as well. -#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template -#for the fourth value. - -output_path_template: ['NA','NA','NA','NA','NA','NA','NA','source_id','experiment_id','NA','platform','custom_pp','modeling_realm','cell_methods','frequency','chunk_freq'] - -output_file_template: ['modeling_realm','temporal_subset','variable_id'] + "frequency", "modeling_realm", "table_id", + "member_id", "grid_label", "variable_id", + "temporal_subset", "chunk_freq", "grid_label", "platform", + "dimensions", "cell_methods", "path"] -#OUTPUT FILE INFO is currently passed as command-line argument. -#We will revisit adding a csvfile, jsonfile and logfile configuration to the builder configuration file in the future. -#csvfile = #jsonfile = #logfile = +output_path_template: ['NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', + 'source_id', 'experiment_id', 'NA', 'platform', 'custom_pp', + 'modeling_realm', 'cell_methods', 'frequency', 'chunk_freq'] -####################################################### -input_path: "/home/runner/work/analysis-scripts-fork/analysis-scripts-fork/archive/oar.gfdl.mdtf/MDTF-examples/mdtf-time-slice-example/gfdl.ncrc5-deploy-prod-openmp/pp" #"ENTER INPUT PATH HERE" #Example: /Users/ar46/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/" -output_path: "gfdl_analysis_citest" #ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path) +output_file_template: ['modeling_realm', 'temporal_subset', 'variable_id'] diff --git a/tests/test_freanalysis_clouds.py b/tests/test_freanalysis_clouds.py new file mode 100644 index 0000000..374b8fb --- /dev/null +++ b/tests/test_freanalysis_clouds.py @@ -0,0 +1,74 @@ +from ftplib import FTP +from os import chdir, environ +from pathlib import Path +from subprocess import run +from tempfile import TemporaryDirectory + +from freanalysis.plugins import list_plugins, plugin_requirements, run_plugin +from scripts import gen_intake_gfdl + + +def download_test_data(stem): + """Downloads test datasets from a FTP server. + + Args: + stem: Directory to create the directory tree inside. + + Returns: + Path to the directory that will be used as the root of the data catalog. + """ + # Create local directory tree with the appropriate directory structure. + catalog_root = Path(stem) / "archive" / "oar.gfdl.mdtf" / "MDTF-examples" / \ + "mdtf-time-slice-example" / "gfdl.ncrc5-deploy-prod-openmp" / "pp" + data_directory = catalog_root / "atmos" / "ts" / "monthly" / "1yr" + data_directory.mkdir(parents=True, exist_ok=True) + + # Download the datasets from the FTP server. + path = "1/oar.gfdl.mdtf/MDTF-examples/GFDL-CM4/data/atmos/ts/monthly/1yr" + with FTP("nomads.gfdl.noaa.gov") as ftp: + ftp.login() + ftp.cwd(path) + for variable in ["high_cld_amt", "mid_cld_amt", "low_cld_amt"]: + name = f"atmos.198001-198012.{variable}.nc" + ftp.retrbinary(f"RETR {name}", open(data_directory / name, "wb").write) + return catalog_root.resolve() + + +def create_data_catalog(path, output="data-catalog"): + """Creates a data catalog from a directory tree. + + Args: + path: Path to the catalog root. + output: Name of the data catalog that will be created. + + Returns: + Paths to the data catalog json and csv files. + """ + yaml_path = Path(__file__).resolve().parent / "mdtf_timeslice_catalog.yaml" + + # Hack to stop click from exiting. + command = ["python3", "-m", "scripts.gen_intake_gfdl", str(path), output, + "--config", str(yaml_path)] + run(command, check=True) + return Path(f"{output}.json").resolve(), Path(f"{output}.csv").resolve() + + +def plugin(json, pngs_directory="pngs"): + """Run the plugin to create the figure. + + Args: + json: Path to the catalog json file. + pngs_directory: Directory to store the output in. + """ + name = "freanalysis_clouds" + reqs = plugin_requirements(name) + Path(pngs_directory).mkdir(parents=True, exist_ok=True) + run_plugin(name, json, pngs_directory) + + +def test_freanalysis_clouds(): + with TemporaryDirectory() as tmp: + chdir(Path(tmp)) + path = download_test_data(stem=tmp) + json, csv = create_data_catalog(path) + plugin("data-catalog.json") From a0fa57d9d15556bb55113846ae3297ab2bec630e Mon Sep 17 00:00:00 2001 From: Raymond Menzel Date: Tue, 9 Jul 2024 11:34:30 -0400 Subject: [PATCH 2/4] add quotes? --- .github/workflows/ci-analysis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-analysis.yml b/.github/workflows/ci-analysis.yml index 97b60d8..36e6b6d 100644 --- a/.github/workflows/ci-analysis.yml +++ b/.github/workflows/ci-analysis.yml @@ -9,7 +9,7 @@ jobs: strategy: max-parallel: 5 matrix: - python-version: [3.9, 3.10, 3.11] + python-version: ['3.9', '3.10', '3.11'] steps: - name: Checkout repository uses: actions/checkout@v4 From ad0691a81fbbc248cb0a452d3a71e2974ebddca1 Mon Sep 17 00:00:00 2001 From: Raymond Menzel Date: Tue, 9 Jul 2024 11:51:46 -0400 Subject: [PATCH 3/4] try without conda --- .github/workflows/ci-analysis.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-analysis.yml b/.github/workflows/ci-analysis.yml index 36e6b6d..a9f8748 100644 --- a/.github/workflows/ci-analysis.yml +++ b/.github/workflows/ci-analysis.yml @@ -17,20 +17,15 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - - name: Add conda to the system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - name: Install dependencies run: | - conda config --add channels noaa-gfdl - conda config --append channels conda-forge - conda install intakebuilder -c noaa-gfdl python3 -m pip install --upgrade pip cd analysis-scripts; pip install .; cd .. cd figure_tools; pip install .; cd .. cd freanalysis; pip install .; cd .. cd freanalysis_clouds; pip install .; cd .. + git clone https://github.com/aradhakrishnanGFDL/CatalogBuilder catalogbuilder + cd catalogbuilder; pip install .; cd .. pip install pytest - name: Test freanalysis_clouds run: | From 3dc90235119ca31cfefc9c8fa2c672c3520e42f3 Mon Sep 17 00:00:00 2001 From: Raymond Menzel Date: Tue, 9 Jul 2024 12:13:23 -0400 Subject: [PATCH 4/4] further cleanup --- .../freanalysis_aerosol/__init__.py | 77 ++++++++----------- .../freanalysis_clouds/__init__.py | 4 +- tests/test_freanalysis_clouds.py | 2 +- 3 files changed, 35 insertions(+), 48 deletions(-) diff --git a/freanalysis_aerosol/freanalysis_aerosol/__init__.py b/freanalysis_aerosol/freanalysis_aerosol/__init__.py index 4531c28..bc8971a 100644 --- a/freanalysis_aerosol/freanalysis_aerosol/__init__.py +++ b/freanalysis_aerosol/freanalysis_aerosol/__init__.py @@ -6,36 +6,23 @@ from figure_tools import LonLatMap, zonal_mean_vertical_and_column_integrated_map, \ ZonalMeanMap import intake -import intake_esm @dataclass class Metadata: - activity_id: str = "dev" - institution_id: str = "" - source_id: str = "am5" - experiment_id: str = "c96L65_am5f7b11r0_amip" - frequency: str = "P1M" - modeling_realm: str = "atmos" - table_id: str = "" - member_id: str = "na" - grid_label: str = "" - temporal_subset: str = "" - chunk_freq: str = "" - platform: str = "" - cell_methods: str = "" - chunk_freq: str = "P1Y" - - def catalog_search_args(self, name): - return { - "experiment_id": self.experiment_id, - "frequency": self.frequency, - "member_id": self.member_id, - "modeling_realm": self.modeling_realm, - "variable_id": name, - } + """Helper class that stores the metadata needed by the plugin.""" + frequency: str = "monthly" + realm: str = "atmos" - def variables(self): + @staticmethod + def variables(): + """Helper function to make maintaining this script easier if the + catalog variable ids change. + + Returns: + Dictionary mapping the names used in this script to the catalog + variable ids. + """ return { "black_carbon": "blk_crb", "black_carbon_column": "blk_crb_col", @@ -81,6 +68,7 @@ def requires(self): "dimensions": { "lat": {"standard_name": "latitude"}, "lon": {"standard_name": "longitude"}, + "pfull": {"standard_name": "air_pressure"}, "time": {"standard_name": "time"} }, "varlist": { @@ -167,16 +155,22 @@ def requires(self): }, }) - def run_analysis(self, catalog, png_dir, reference_catalog=None): + def run_analysis(self, catalog, png_dir, reference_catalog=None, config={}): """Runs the analysis and generates all plots and associated datasets. Args: catalog: Path to a catalog. png_dir: Path to the directory where the figures will be made. reference_catalog: Path to a catalog of reference data. + config: Dictionary of catalog metadata. Will overwrite the + data defined in the Metadata helper class if they both + contain the same keys. Returns: A list of paths to the figures that were created. + + Raises: + ValueError if the catalog cannot be filtered correctly. """ # Connect to the catalog and find the necessary datasets. @@ -184,30 +178,23 @@ def run_analysis(self, catalog, png_dir, reference_catalog=None): maps = {} for name, variable in self.metadata.variables().items(): - # Get the dataset out of the catalog. - args = self.metadata.catalog_search_args(variable) - - datasets = catalog.search( - **self.metadata.catalog_search_args(variable) - ).to_dataset_dict(progressbar=False) + # Filter the catalog down to a single dataset for each variable. + query_params = {"variable_id": variable} + query_params.update(vars(self.metadata)) + query_params.update(config) + datasets = catalog.search(**query_params).to_dataset_dict(progressbar=False) + if len(list(datasets.values())) != 1: + raise ValueError("could not filter the catalog down to a single dataset.") dataset = list(datasets.values())[0] if name.endswith("column"): # Lon-lat maps. - maps[name] = LonLatMap.from_xarray_dataset( - dataset, - variable, - time_method="annual mean", - year=1980, - ) + maps[name] = LonLatMap.from_xarray_dataset(dataset, variable, year=1980, + time_method="annual mean") else: - maps[name] = ZonalMeanMap.from_xarray_dataset( - dataset, - variable, - time_method="annual mean", - year=1980, - invert_y_axis=True, - ) + maps[name] = ZonalMeanMap.from_xarray_dataset(dataset, variable, year=1980, + time_method="annual mean", + invert_y_axis=True) figure_paths = [] for name in self.metadata.variables().keys(): diff --git a/freanalysis_clouds/freanalysis_clouds/__init__.py b/freanalysis_clouds/freanalysis_clouds/__init__.py index ef0db95..3c3724f 100644 --- a/freanalysis_clouds/freanalysis_clouds/__init__.py +++ b/freanalysis_clouds/freanalysis_clouds/__init__.py @@ -19,7 +19,7 @@ def variables(): catalog variable ids change. Returns: - Dictionary mapping the names used in this scripts to the catalog + Dictionary mapping the names used in this script to the catalog variable ids. """ return { @@ -103,7 +103,7 @@ def run_analysis(self, catalog, png_dir, reference_catalog=None, config={}): query_params.update(vars(self.metadata)) query_params.update(config) datasets = catalog.search(**query_params).to_dataset_dict(progressbar=False) - if len(datasets) != 1: + if len(list(datasets.values())) != 1: raise ValueError("could not filter the catalog down to a single dataset.") dataset = list(datasets.values())[0] diff --git a/tests/test_freanalysis_clouds.py b/tests/test_freanalysis_clouds.py index 374b8fb..eb7b889 100644 --- a/tests/test_freanalysis_clouds.py +++ b/tests/test_freanalysis_clouds.py @@ -71,4 +71,4 @@ def test_freanalysis_clouds(): chdir(Path(tmp)) path = download_test_data(stem=tmp) json, csv = create_data_catalog(path) - plugin("data-catalog.json") + plugin(json)