From 84837e84d298973e10786ad91b83e28965c9a9ac Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Tue, 30 Apr 2024 19:55:57 -0400 Subject: [PATCH 1/8] Get git hash url from gh actions + inject actions url by default --- .../data_management_transforms.py | 68 ++++++++++++++----- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/leap_data_management_utils/data_management_transforms.py b/leap_data_management_utils/data_management_transforms.py index cd8071b..2e12879 100644 --- a/leap_data_management_utils/data_management_transforms.py +++ b/leap_data_management_utils/data_management_transforms.py @@ -14,26 +14,58 @@ yaml = YAML(typ='safe') +def get_github_actions_url() -> str: + """Return the url of the gh action run""" + if os.getenv('GITHUB_ACTIONS') == 'true': + print('Running from within GH actions') + server_url = os.getenv('GITHUB_SERVER_URL') + repository = os.getenv('GITHUB_REPOSITORY') + run_id = os.getenv('GITHUB_RUN_ID') + commit_hash = os.getenv('GITHUB_SHA') + + if server_url and repository and run_id and commit_hash: + return f"{server_url}/{repository}/actions/runs/{run_id}" + else: + print("One or more environment variables are missing.") + return "none" -def get_github_commit_url() -> Optional[str]: +def get_github_commit_url() -> str: """Get the GitHub commit URL for the current commit""" # Get GitHub Server URL - github_server_url = 'https://github.com' - - # Get the repository's remote origin URL - try: - repo_origin_url = subprocess.check_output( - ['git', 'config', '--get', 'remote.origin.url'], text=True - ).strip() - - # Extract the repository path from the remote URL - repository_path = repo_origin_url.split('github.com/')[-1].replace('.git', '') - - # Get the current commit SHA - commit_sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], text=True).strip() - - # Construct the GitHub commit URL - git_url_hash = f'{github_server_url}/{repository_path}/commit/{commit_sha}' + + + # check if this is running from within a github action + if os.getenv('GITHUB_ACTIONS') == 'true': + print('Running from within GH actions') + server_url = os.getenv('GITHUB_SERVER_URL') + repository = os.getenv('GITHUB_REPOSITORY') + run_id = os.getenv('GITHUB_RUN_ID') + commit_hash = os.getenv('GITHUB_SHA') + + if server_url and repository and run_id and commit_hash: + git_url_hash = f"{server_url}/{repository}/commit/{commit_hash}" + else: + print("Could not construct git_url_hash. One or more environment variables are missing.") + git_url_hash = "none" + + else: + #TODO: If the above fails, maybe still try this? Even though that would be a really rare case? + print('Fallback: Calling git via subprocess') + github_server_url = 'https://github.com' + # Get the repository's remote origin URL + try: + repo_origin_url = subprocess.check_output( + ['git', 'config', '--get', 'remote.origin.url'], text=True + ).strip() + + # Extract the repository path from the remote URL + repository_path = repo_origin_url.split('github.com/')[-1].replace('.git', '') + + # Get the current commit SHA + commit_sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], text=True).strip() + + # Construct the GitHub commit URL + git_url_hash = f'{github_server_url}/{repository_path}/commit/{commit_sha}' # Output the GitHub commit URL return git_url_hash @@ -197,9 +229,11 @@ def __post_init__(self): if self.add_provenance: git_url_hash = get_github_commit_url() + gh_actions_url = get_github_actions_url() timestamp = datetime.now(timezone.utc).isoformat() provenance_dict = { 'pangeo_forge_build_git_hash': git_url_hash, + 'pangeo_forge_gh_actions_url': gh_actions_url 'pangeo_forge_build_timestamp': timestamp, } self.inject_attrs.update(provenance_dict) From 84d728b5e9932f6122079312b6468c5fe3987c51 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 Apr 2024 23:56:27 +0000 Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../data_management_transforms.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/leap_data_management_utils/data_management_transforms.py b/leap_data_management_utils/data_management_transforms.py index 2e12879..074fe2f 100644 --- a/leap_data_management_utils/data_management_transforms.py +++ b/leap_data_management_utils/data_management_transforms.py @@ -22,7 +22,7 @@ def get_github_actions_url() -> str: repository = os.getenv('GITHUB_REPOSITORY') run_id = os.getenv('GITHUB_RUN_ID') commit_hash = os.getenv('GITHUB_SHA') - + if server_url and repository and run_id and commit_hash: return f"{server_url}/{repository}/actions/runs/{run_id}" else: @@ -32,7 +32,7 @@ def get_github_actions_url() -> str: def get_github_commit_url() -> str: """Get the GitHub commit URL for the current commit""" # Get GitHub Server URL - + # check if this is running from within a github action if os.getenv('GITHUB_ACTIONS') == 'true': @@ -41,7 +41,7 @@ def get_github_commit_url() -> str: repository = os.getenv('GITHUB_REPOSITORY') run_id = os.getenv('GITHUB_RUN_ID') commit_hash = os.getenv('GITHUB_SHA') - + if server_url and repository and run_id and commit_hash: git_url_hash = f"{server_url}/{repository}/commit/{commit_hash}" else: @@ -57,13 +57,13 @@ def get_github_commit_url() -> str: repo_origin_url = subprocess.check_output( ['git', 'config', '--get', 'remote.origin.url'], text=True ).strip() - + # Extract the repository path from the remote URL repository_path = repo_origin_url.split('github.com/')[-1].replace('.git', '') - + # Get the current commit SHA commit_sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], text=True).strip() - + # Construct the GitHub commit URL git_url_hash = f'{github_server_url}/{repository_path}/commit/{commit_sha}' From 427da93ed1ea73f023ff277efcbb7b2731f94b82 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Tue, 30 Apr 2024 20:02:55 -0400 Subject: [PATCH 3/8] Update data_management_transforms.py --- leap_data_management_utils/data_management_transforms.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/leap_data_management_utils/data_management_transforms.py b/leap_data_management_utils/data_management_transforms.py index 074fe2f..cdf6d2b 100644 --- a/leap_data_management_utils/data_management_transforms.py +++ b/leap_data_management_utils/data_management_transforms.py @@ -28,6 +28,8 @@ def get_github_actions_url() -> str: else: print("One or more environment variables are missing.") return "none" + else: + return "none" def get_github_commit_url() -> str: """Get the GitHub commit URL for the current commit""" @@ -66,7 +68,9 @@ def get_github_commit_url() -> str: # Construct the GitHub commit URL git_url_hash = f'{github_server_url}/{repository_path}/commit/{commit_sha}' - + except Exception as e: + print(f"Getting git_url_hash failed with {e}") + git_url_hash = "none" # Output the GitHub commit URL return git_url_hash From 91e44a9c1738edc47d612279414e96fbf298e311 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Tue, 30 Apr 2024 20:49:09 -0400 Subject: [PATCH 4/8] Update data_management_transforms.py --- leap_data_management_utils/data_management_transforms.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/leap_data_management_utils/data_management_transforms.py b/leap_data_management_utils/data_management_transforms.py index cdf6d2b..93e7462 100644 --- a/leap_data_management_utils/data_management_transforms.py +++ b/leap_data_management_utils/data_management_transforms.py @@ -71,13 +71,8 @@ def get_github_commit_url() -> str: except Exception as e: print(f"Getting git_url_hash failed with {e}") git_url_hash = "none" - # Output the GitHub commit URL - return git_url_hash - - except subprocess.CalledProcessError as e: - print('Error executing Git command:', e) - return None - + # Output the GitHub commit URL + return git_url_hash def get_catalog_store_urls(catalog_yaml_path: str) -> dict[str, str]: with open(catalog_yaml_path) as f: From 3f1cd1d42660615c89051d9569528ac18747d144 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Tue, 30 Apr 2024 21:17:39 -0400 Subject: [PATCH 5/8] Update data_management_transforms.py --- leap_data_management_utils/data_management_transforms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/leap_data_management_utils/data_management_transforms.py b/leap_data_management_utils/data_management_transforms.py index 93e7462..c7968f7 100644 --- a/leap_data_management_utils/data_management_transforms.py +++ b/leap_data_management_utils/data_management_transforms.py @@ -232,7 +232,7 @@ def __post_init__(self): timestamp = datetime.now(timezone.utc).isoformat() provenance_dict = { 'pangeo_forge_build_git_hash': git_url_hash, - 'pangeo_forge_gh_actions_url': gh_actions_url + 'pangeo_forge_gh_actions_url': gh_actions_url, 'pangeo_forge_build_timestamp': timestamp, } self.inject_attrs.update(provenance_dict) From 77bb41477c9f8a4a59879acfafbe654d4406b672 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 01:17:44 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../data_management_transforms.py | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/leap_data_management_utils/data_management_transforms.py b/leap_data_management_utils/data_management_transforms.py index c7968f7..58ea6f7 100644 --- a/leap_data_management_utils/data_management_transforms.py +++ b/leap_data_management_utils/data_management_transforms.py @@ -14,6 +14,7 @@ yaml = YAML(typ='safe') + def get_github_actions_url() -> str: """Return the url of the gh action run""" if os.getenv('GITHUB_ACTIONS') == 'true': @@ -24,18 +25,18 @@ def get_github_actions_url() -> str: commit_hash = os.getenv('GITHUB_SHA') if server_url and repository and run_id and commit_hash: - return f"{server_url}/{repository}/actions/runs/{run_id}" + return f'{server_url}/{repository}/actions/runs/{run_id}' else: - print("One or more environment variables are missing.") - return "none" + print('One or more environment variables are missing.') + return 'none' else: - return "none" + return 'none' + def get_github_commit_url() -> str: """Get the GitHub commit URL for the current commit""" # Get GitHub Server URL - # check if this is running from within a github action if os.getenv('GITHUB_ACTIONS') == 'true': print('Running from within GH actions') @@ -45,13 +46,15 @@ def get_github_commit_url() -> str: commit_hash = os.getenv('GITHUB_SHA') if server_url and repository and run_id and commit_hash: - git_url_hash = f"{server_url}/{repository}/commit/{commit_hash}" + git_url_hash = f'{server_url}/{repository}/commit/{commit_hash}' else: - print("Could not construct git_url_hash. One or more environment variables are missing.") - git_url_hash = "none" + print( + 'Could not construct git_url_hash. One or more environment variables are missing.' + ) + git_url_hash = 'none' else: - #TODO: If the above fails, maybe still try this? Even though that would be a really rare case? + # TODO: If the above fails, maybe still try this? Even though that would be a really rare case? print('Fallback: Calling git via subprocess') github_server_url = 'https://github.com' # Get the repository's remote origin URL @@ -69,11 +72,12 @@ def get_github_commit_url() -> str: # Construct the GitHub commit URL git_url_hash = f'{github_server_url}/{repository_path}/commit/{commit_sha}' except Exception as e: - print(f"Getting git_url_hash failed with {e}") - git_url_hash = "none" + print(f'Getting git_url_hash failed with {e}') + git_url_hash = 'none' # Output the GitHub commit URL return git_url_hash + def get_catalog_store_urls(catalog_yaml_path: str) -> dict[str, str]: with open(catalog_yaml_path) as f: catalog_meta = yaml.load(f) From f441cd498133228a3a35a34e5de719ea4a37c0e1 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Tue, 30 Apr 2024 21:35:16 -0400 Subject: [PATCH 7/8] Update data_management_transforms.py --- leap_data_management_utils/data_management_transforms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/leap_data_management_utils/data_management_transforms.py b/leap_data_management_utils/data_management_transforms.py index 58ea6f7..39281c8 100644 --- a/leap_data_management_utils/data_management_transforms.py +++ b/leap_data_management_utils/data_management_transforms.py @@ -7,6 +7,7 @@ from typing import Optional import apache_beam as beam +import os import zarr from google.api_core.exceptions import NotFound from google.cloud import bigquery From d7fcdb1df5a281d728ea61965ce7ed26a28db5fc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 01:35:22 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- leap_data_management_utils/data_management_transforms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/leap_data_management_utils/data_management_transforms.py b/leap_data_management_utils/data_management_transforms.py index 39281c8..b953a88 100644 --- a/leap_data_management_utils/data_management_transforms.py +++ b/leap_data_management_utils/data_management_transforms.py @@ -1,13 +1,13 @@ # Note: All of this code was written by Julius Busecke and copied from this feedstock: # https://github.com/leap-stc/cmip6-leap-feedstock/blob/main/feedstock/recipe.py#L262 +import os import subprocess from dataclasses import dataclass from datetime import datetime, timezone from typing import Optional import apache_beam as beam -import os import zarr from google.api_core.exceptions import NotFound from google.cloud import bigquery