try a container for e3sm-diags

mahf708 · mahf708 · commit 6992c1af0a35 · 2024-11-23T12:51:09.000-05:00
diff --git a/.github/workflows/e3sm-diags.yaml b/.github/workflows/e3sm-diags.yaml
@@ -0,0 +1,65 @@
+name: e3sm-diags
+
+on:
+  merge_group:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'e3sm-diags/**'
+      - '.github/workflows/e3sm-diags.yaml'
+  push:
+    branches: [ main ]
+    paths:
+      - 'e3sm-diags/**'
+      - '.github/workflows/e3sm-diags.yaml'
+    tags:
+      - 'e3sm-diags-*'
+
+jobs:
+  ocis:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      id-token: write
+    strategy:
+      fail-fast: false
+
+    steps:
+      - 
+        name: Checkout
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+          show-progress: false
+          persist-credentials: false
+      -
+        name: GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      -
+        name: Meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ghcr.io/${{ github.repository }}-e3sm-diags
+      -
+        name: QEMU
+        uses: docker/setup-qemu-action@v3
+      -
+        name: Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Push
+        uses: docker/build-push-action@v6
+        with:
+          context: e3sm-diags/
+          file: e3sm-diags/Dockerfile
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/e3sm-diags/Dockerfile b/e3sm-diags/Dockerfile
@@ -0,0 +1,9 @@
+FROM python
+
+RUN mkdir -p /e3sm_diags_downloaded_data/tests/integration
+
+COPY download_files.py /app/download_files.py
+RUN chmod +x /app/download_files.py
+RUN /app/download_files.py
+
+ENTRYPOINT ["/bin/bash", "--rcfile", "/etc/profile", "-l"]
diff --git a/e3sm-diags/download_files.py b/e3sm-diags/download_files.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+
+import os
+import re
+import urllib.request
+from typing import List
+
+TEST_ROOT_PATH = "/e3sm_diags_downloaded_data/tests/integration/"
+TEST_DATA_DIR = "integration_test_data"
+TEST_IMAGES_DIR = "integration_test_images"
+
+
+# https://stackoverflow.com/questions/49113616/how-to-download-file-using-python
+def retrieve_file(url, file_path):
+    dir_path = os.path.join(*os.path.split(file_path)[:-1])
+    # https://stackoverflow.com/questions/12517451/automatically-creating-directories-with-file-output
+    if dir_path:
+        os.makedirs(dir_path, exist_ok=True)
+    print("Downloading {} to {}".format(url, file_path))
+    urllib.request.urlretrieve(url, file_path)
+    return file_path
+
+
+def download_files(url_prefix, url_suffix, directory_prefix=None):
+    print(f"Downloading {url_suffix}")
+    print("url_prefix={}".format(url_prefix))
+    print("url_suffix={}".format(url_suffix))
+    print("(local) directory_prefix={}".format(directory_prefix))
+
+    url = os.path.join(url_prefix, url_suffix)
+
+    if directory_prefix:
+        links_file_path = os.path.join(directory_prefix, url_suffix)
+    else:
+        links_file_path = url_suffix
+
+    links_file_path = "{}.html".format(links_file_path)
+    print(
+        "Downloading files from {}; checking for links on {}".format(
+            url, links_file_path
+        )
+    )
+    html_path = retrieve_file(url, links_file_path)
+    links: List[str] = []
+
+    with open(html_path, "r") as html:
+        for line in html:
+            match_object = re.search(r'href=[\'"]?([^\'" >]+)', line)
+            if match_object:
+                link = match_object.group(1)
+                # Ignore parent directory and sorting links
+                if (
+                    ("../" not in link)
+                    and (not link.startswith("/"))
+                    and ("?" not in link)
+                ):
+                    print("Found a link: {}".format(link))
+                    links.append(link)
+
+    if os.path.exists(links_file_path):
+        os.remove(links_file_path)
+
+    files = []
+    directories = []
+
+    for link in links:
+        if link.endswith("/"):
+            # List directories to download.
+            directories.append(link)
+        else:
+            # List '.csv', '.mat', '.nc', and '.png' files to download.
+            files.append(link)
+
+    print("\n###Downloading files")
+
+    if directory_prefix:
+        new_directory_prefix = os.path.join(directory_prefix, url_suffix)
+    else:
+        new_directory_prefix = url_suffix
+    for f in files:
+        url_to_download = os.path.join(url, f)
+        file_path = os.path.join(new_directory_prefix, f)
+        retrieve_file(url_to_download, file_path)
+
+    print("\n###Downloading directories")
+    for d in directories:
+        new_directory = d.rstrip("/")
+        download_files(url, new_directory, directory_prefix=new_directory_prefix)
+
+
+def download():
+    download_files(
+        "https://web.lcrc.anl.gov/public/e3sm/e3sm_diags_test_data/integration",
+        TEST_DATA_DIR,
+        directory_prefix=TEST_ROOT_PATH,
+    )
+    download_files(
+        "https://web.lcrc.anl.gov/public/e3sm/e3sm_diags_test_data/integration/expected",
+        TEST_IMAGES_DIR,
+        directory_prefix=TEST_ROOT_PATH,
+    )
+    print(f"Downloaded {TEST_DATA_DIR} and {TEST_ROOT_PATH}")
+
+
+if __name__ == "__main__":
+    download()
diff --git a/e3sm-diags/readme b/e3sm-diags/readme
@@ -0,0 +1 @@
+A container housing test data for e3sm_diags

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+A container housing test data for e3sm_diags`