diff --git a/.github/workflows/unittest-workflow.yaml b/.github/workflows/unittest-workflow.yaml new file mode 100644 index 0000000..10c9cb0 --- /dev/null +++ b/.github/workflows/unittest-workflow.yaml @@ -0,0 +1,36 @@ +name: Unittest deep-code + +on: + push: + release: + types: [published] + +jobs: + unittest: + runs-on: ubuntu-latest + steps: + - name: checkout deep-code + uses: actions/checkout@v4 + + - name: Set up MicroMamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: environment.yml + + - name: Install deep-code in editable mode + shell: bash -l {0} + run: | + cd /home/runner/work/deep-code/deep-code + pip install -e . + + - name: Run unit tests + shell: bash -l {0} + run: | + cd /home/runner/work/deep-code/deep-code + pytest --cov=deep_code --cov-report=xml + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + slug: deepesdl/deep-code \ No newline at end of file diff --git a/.gitignore b/.gitignore index 82f9275..4df4d63 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,7 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# Exclude sensitive configuration files from version control +.gitaccess +dataset-config.yaml \ No newline at end of file diff --git a/README.md b/README.md index 405e0f2..1d4b381 100644 --- a/README.md +++ b/README.md @@ -1 +1,87 @@ -# deep-code \ No newline at end of file +# deep-code + +[![Build Status](https://github.com/deepesdl/deep-code/actions/workflows/unittest-workflow.yaml/badge.svg)](https://github.com/deepesdl/deep-code/actions/workflows/unittest-workflow.yaml) +[![codecov](https://codecov.io/gh/deepesdl/deep-code/graph/badge.svg?token=47MQXOXWOK)](https://codecov.io/gh/deepesdl/deep-code) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![License](https://img.shields.io/github/license/dcs4cop/xcube-smos)](https://github.com/deepesdl/deep-code/blob/main/LICENSE) + +`deep-code` is a lightweight python tool that comprises a command line interface(CLI) +and Python API providing utilities that aid integration of DeepESDL datasets, +experiments with EarthCODE. + +## Setup + +## Install +`deep-code` will be available in PyPI and conda-forge. Till the stable release, +developers/contributors can follow the below steps to install deep-code. + +## Installing from the repository for Developer + +To install deep-code directly from the git repository, clone the repository, and execute the steps below: + +```commandline +conda env create +conda activate deep-code +pip install -e . +``` + +This installs all the dependencies of `deep-code` into a fresh conda environment, +and installs deep-code from the repository into the same environment. + +## Testing + +To run the unit test suite: + +```commandline +pytest +``` + +To analyze test coverage +```shell +pytest --cov=deep-code +``` + +To produce an HTML coverage report + +```commandline +pytest --cov-report html --cov=deep-code +``` + +## deep_code usage + +`deep_code` provides a command-line tool called deep-code, which has several subcommands +providing different utility functions. +Use the --help option with these subcommands to get more details on usage. + +### deep-code publish-product + +Publish a dataset which is a result of an experiment to the EarthCODE +open-science catalog. + +```commandline + deep-code publish-dataset /path/to/dataset-config.yaml + ``` + +#### .gitaccess example + +``` +github-username: your-git-user +github-token: personal access token +``` + +#### dataset-config.yaml example + +``` +dataset-id: hydrology-1D-0.009deg-100x60x60-3.0.2.zarr +collection-id: hydrology + +#non-mandatory +documentation-link: https://deepesdl.readthedocs.io/en/latest/datasets/hydrology-1D-0-009deg-100x60x60-3-0-2-zarr/ +access-link: s3://test +dataset-status: completed +dataset-region: global +dataset-theme: ["ocean", "environment"] +cf-parameter: [{"Name" : "hydrology"}] +``` + +dataset-id has to be a valid dataset-id from `deep-esdl-public` s3 or your team bucket. \ No newline at end of file diff --git a/deep_code/__init__.py b/deep_code/__init__.py index 451592c..ff01dbd 100644 --- a/deep_code/__init__.py +++ b/deep_code/__init__.py @@ -1,5 +1,5 @@ # The MIT License (MIT) -# Copyright (c) 2024 by the xcube development team and contributors +# Copyright (c) 2024 by DeepESDL and Brockmann Consult GmbH # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -21,4 +21,4 @@ from .version import version -__version__ = version \ No newline at end of file +__version__ = version diff --git a/deep_code/api/__init__.py b/deep_code/api/__init__.py deleted file mode 100644 index 96d27c6..0000000 --- a/deep_code/api/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2024 by xcube team and contributors -# Permissions are hereby granted under the terms of the MIT License: -# https://opensource.org/licenses/MIT. \ No newline at end of file diff --git a/deep_code/api/check_repository.py b/deep_code/api/check_repository.py deleted file mode 100644 index e69de29..0000000 diff --git a/deep_code/api/new.py b/deep_code/api/new.py deleted file mode 100644 index b5a382d..0000000 --- a/deep_code/api/new.py +++ /dev/null @@ -1 +0,0 @@ -# Logic for initializing repositories \ No newline at end of file diff --git a/deep_code/api/publish_experiments.py b/deep_code/api/publish_experiments.py deleted file mode 100644 index 8586380..0000000 --- a/deep_code/api/publish_experiments.py +++ /dev/null @@ -1 +0,0 @@ -# Logic for publishing experiments on EarthCODE catalog \ No newline at end of file diff --git a/deep_code/api/publish_products.py b/deep_code/api/publish_products.py deleted file mode 100644 index a3afe4a..0000000 --- a/deep_code/api/publish_products.py +++ /dev/null @@ -1 +0,0 @@ -# Logic for publishing products on EarthCODE catalog \ No newline at end of file diff --git a/deep_code/api/setup_ci.py b/deep_code/api/setup_ci.py deleted file mode 100644 index 41c5846..0000000 --- a/deep_code/api/setup_ci.py +++ /dev/null @@ -1 +0,0 @@ -# Logic for setting up build pipelines \ No newline at end of file diff --git a/deep_code/api/test.py b/deep_code/api/test.py deleted file mode 100644 index e69de29..0000000 diff --git a/deep_code/cli/__init__.py b/deep_code/cli/__init__.py new file mode 100644 index 0000000..073ddd0 --- /dev/null +++ b/deep_code/cli/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. diff --git a/deep_code/cli/main.py b/deep_code/cli/main.py new file mode 100644 index 0000000..be88985 --- /dev/null +++ b/deep_code/cli/main.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +import click + +from deep_code.cli.publish import publish_dataset + + +@click.group() +def main(): + """Deep Code CLI.""" + pass + + +main.add_command(publish_dataset) +if __name__ == "__main__": + main() diff --git a/deep_code/cli/publish.py b/deep_code/cli/publish.py new file mode 100644 index 0000000..48b1e63 --- /dev/null +++ b/deep_code/cli/publish.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +import click + +from deep_code.tools.publish import DatasetPublisher + + +@click.command(name="publish-dataset") +@click.argument( + "dataset_config", + type=click.Path(exists=True) +) +def publish_dataset(dataset_config): + """Request publishing a dataset to the open science catalogue. + """ + publisher = DatasetPublisher() + publisher.publish_dataset(dataset_config_path=dataset_config) diff --git a/deep_code/constants.py b/deep_code/constants.py new file mode 100644 index 0000000..68982bc --- /dev/null +++ b/deep_code/constants.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2024 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +OSC_SCHEMA_URI = "https://stac-extensions.github.io/osc/v1.0.0-rc.3/schema.json" +CF_SCHEMA_URI = "https://stac-extensions.github.io/cf/v0.2.0/schema.json" +OSC_REPO_OWNER = "ESA-EarthCODE" +OSC_REPO_NAME = "open-science-catalog-metadata-testing" +OSC_BRANCH_NAME = "add-new-collection" diff --git a/deep_code/tests/tools/__init__.py b/deep_code/tests/tools/__init__.py new file mode 100644 index 0000000..073ddd0 --- /dev/null +++ b/deep_code/tests/tools/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. diff --git a/deep_code/tests/tools/test_publish.py b/deep_code/tests/tools/test_publish.py new file mode 100644 index 0000000..47c9961 --- /dev/null +++ b/deep_code/tests/tools/test_publish.py @@ -0,0 +1,122 @@ +import pytest +from unittest.mock import patch, MagicMock, mock_open + +from deep_code.tools.publish import DatasetPublisher + + +class TestDatasetPublisher: + @patch("deep_code.tools.publish.fsspec.open") + def test_init_missing_credentials(self, mock_fsspec_open): + mock_fsspec_open.return_value.__enter__.return_value = mock_open( + read_data="{}" + )() + + with pytest.raises( + ValueError, match="GitHub credentials are missing in the `.gitaccess` file." + ): + DatasetPublisher() + + @patch("deep_code.tools.publish.fsspec.open") + def test_publish_dataset_missing_ids(self, mock_fsspec_open): + git_yaml_content = """ + github-username: test-user + github-token: test-token + """ + dataset_yaml_content = """ + collection-id: test-collection + """ + mock_fsspec_open.side_effect = [ + mock_open(read_data=git_yaml_content)(), + mock_open(read_data=dataset_yaml_content)(), + ] + + publisher = DatasetPublisher() + + with pytest.raises( + ValueError, + match="Dataset ID or Collection ID is missing in the " + "dataset-config.yaml file.", + ): + publisher.publish_dataset("/path/to/dataset-config.yaml") + + @patch("deep_code.utils.github_automation.os.chdir") + @patch("deep_code.utils.github_automation.subprocess.run") + @patch("deep_code.utils.github_automation.os.path.expanduser", return_value="/tmp") + @patch("requests.post") + @patch("deep_code.utils.github_automation.GitHubAutomation") + @patch("deep_code.tools.publish.fsspec.open") + def test_publish_dataset_success( + self, + mock_fsspec_open, + mock_github_automation, + mock_requests_post, + mock_expanduser, + mock_subprocess_run, + mock_chdir, + ): + + # Mock the YAML reads + git_yaml_content = """ + github-username: test-user + github-token: test-token + """ + dataset_yaml_content = """ + dataset-id: test-dataset + collection-id: test-collection + documentation-link: http://example.com/doc + access-link: http://example.com/access + dataset-status: ongoing + dataset-region: Global + dataset-theme: ["climate"] + cf-parameter: [] + """ + mock_fsspec_open.side_effect = [ + mock_open(read_data=git_yaml_content)(), + mock_open(read_data=dataset_yaml_content)(), + ] + + # Mock GitHubAutomation methods + mock_git = mock_github_automation.return_value + mock_git.fork_repository.return_value = None + mock_git.clone_repository.return_value = None + mock_git.create_branch.return_value = None + mock_git.add_file.return_value = None + mock_git.commit_and_push.return_value = None + mock_git.create_pull_request.return_value = "http://example.com/pr" + mock_git.clean_up.return_value = None + + # Mock subprocess.run & os.chdir + mock_subprocess_run.return_value = None + mock_chdir.return_value = None + + # Mock STAC generator + mock_collection = MagicMock() + mock_collection.to_dict.return_value = { + "type": "Collection", + "id": "test-collection", + "description": "A test STAC collection", + "extent": { + "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, + "temporal": {"interval": [["2023-01-01T00:00:00Z", None]]}, + }, + "links": [], + "stac_version": "1.0.0", + } + with patch("deep_code.tools.publish.OSCProductSTACGenerator") as mock_generator: + mock_generator.return_value.build_stac_collection.return_value = ( + mock_collection + ) + + # Instantiate & publish + publisher = DatasetPublisher() + publisher.publish_dataset("/fake/path/to/dataset-config.yaml") + + # 6Assert that we called git clone with /tmp/temp_repo + # Because expanduser("~") is now patched to /tmp, the actual path is /tmp/temp_repo + auth_url = "https://test-user:test-token@github.com/test-user/open-science-catalog-metadata-testing.git" + mock_subprocess_run.assert_any_call( + ["git", "clone", auth_url, "/tmp/temp_repo"], check=True + ) + + # Also confirm we changed directories to /tmp/temp_repo + mock_chdir.assert_any_call("/tmp/temp_repo") diff --git a/deep_code/tests/utils/__init__.py b/deep_code/tests/utils/__init__.py new file mode 100644 index 0000000..073ddd0 --- /dev/null +++ b/deep_code/tests/utils/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. diff --git a/deep_code/tests/utils/test_dataset_stac_generator.py b/deep_code/tests/utils/test_dataset_stac_generator.py new file mode 100644 index 0000000..12321b2 --- /dev/null +++ b/deep_code/tests/utils/test_dataset_stac_generator.py @@ -0,0 +1,205 @@ +import os +from datetime import datetime + +import numpy as np +from pystac import Collection +import unittest +from unittest.mock import patch, MagicMock +from xarray import Dataset + +from deep_code.utils.dataset_stac_generator import OSCProductSTACGenerator + + +class TestOSCProductSTACGenerator(unittest.TestCase): + @patch("deep_code.utils.dataset_stac_generator.new_data_store") + def setUp(self, mock_data_store): + """Set up a mock dataset and generator.""" + self.mock_dataset = Dataset( + coords={ + "lon": ("lon", np.linspace(-180, 180, 10)), + "lat": ("lat", np.linspace(-90, 90, 5)), + "time": ( + "time", + [ + np.datetime64(datetime(2023, 1, 1), "ns"), + np.datetime64(datetime(2023, 1, 2), "ns"), + ], + ), + }, + attrs={"description": "Mock dataset for testing.", "title": "Mock Dataset"}, + data_vars={ + "var1": (("time", "lat", "lon"), np.random.rand(2, 5, 10)), + "var2": (("time", "lat", "lon"), np.random.rand(2, 5, 10)), + }, + ) + mock_store = MagicMock() + mock_store.open_data.return_value = self.mock_dataset + mock_data_store.return_value = mock_store + + self.generator = OSCProductSTACGenerator( + dataset_id="mock-dataset-id", + collection_id="mock-collection-id", + access_link="s3://mock-bucket/mock-dataset", + documentation_link="https://example.com/docs", + osc_status="ongoing", + osc_region="Global", + osc_themes=["climate", "environment"], + ) + + def test_open_dataset(self): + """Test if the dataset is opened correctly.""" + self.assertIsInstance(self.generator.dataset, Dataset) + self.assertIn("lon", self.generator.dataset.coords) + self.assertIn("lat", self.generator.dataset.coords) + self.assertIn("time", self.generator.dataset.coords) + + def test_get_spatial_extent(self): + """Test spatial extent extraction.""" + extent = self.generator._get_spatial_extent() + self.assertEqual(extent.bboxes[0], [-180.0, -90.0, 180.0, 90.0]) + + def test_get_temporal_extent(self): + """Test temporal extent extraction.""" + extent = self.generator._get_temporal_extent() + expected_intervals = [datetime(2023, 1, 1, 0, 0), datetime(2023, 1, 2, 0, 0)] + self.assertEqual(extent.intervals[0], expected_intervals) + + def test_get_variables(self): + """Test variable extraction.""" + variables = self.generator._get_variables() + self.assertEqual(variables, ["var1", "var2"]) + + def test_get_general_metadata(self): + """Test general metadata extraction.""" + metadata = self.generator._get_general_metadata() + self.assertEqual(metadata["description"], "Mock dataset for testing.") + + @patch("pystac.Collection.add_link") + @patch("pystac.Collection.set_self_href") + def test_build_stac_collection(self, mock_set_self_href, mock_add_link): + """Test STAC collection creation.""" + collection = self.generator.build_stac_collection() + self.assertIsInstance(collection, Collection) + self.assertEqual(collection.id, "mock-collection-id") + self.assertEqual(collection.description, "Mock dataset for testing.") + self.assertEqual( + collection.extent.spatial.bboxes[0], [-180.0, -90.0, 180.0, 90.0] + ) + self.assertEqual( + collection.extent.temporal.intervals[0], + [datetime(2023, 1, 1, 0, 0), datetime(2023, 1, 2, 0, 0)], + ) + mock_set_self_href.assert_called_once() + mock_add_link.assert_called() + + def test_invalid_spatial_extent(self): + """Test spatial extent extraction with missing coordinates.""" + self.generator.dataset = Dataset(coords={"x": [], "y": []}) + with self.assertRaises(ValueError): + self.generator._get_spatial_extent() + + def test_invalid_temporal_extent(self): + """Test temporal extent extraction with missing time.""" + self.generator.dataset = Dataset(coords={}) + with self.assertRaises(ValueError): + self.generator._get_temporal_extent() + + +class TestOpenDataset(unittest.TestCase): + @patch("deep_code.utils.dataset_stac_generator.new_data_store") + @patch("deep_code.utils.dataset_stac_generator.logging.getLogger") + def test_open_dataset_success_public_store(self, mock_logger, mock_new_data_store): + """Test dataset opening with the public store configuration.""" + # Create a mock store and mock its `open_data` method + mock_store = MagicMock() + mock_new_data_store.return_value = mock_store + mock_store.open_data.return_value = "mock_dataset" + + # Instantiate the generator (this will implicitly call _open_dataset) + generator = OSCProductSTACGenerator("mock-dataset-id", "mock-collection-id") + + # Validate that the dataset is assigned correctly + self.assertEqual(generator.dataset, "mock_dataset") + + # Validate that `new_data_store` was called once with the correct parameters + mock_new_data_store.assert_called_once_with( + "s3", root="deep-esdl-public", storage_options={"anon": True} + ) + + # Ensure `open_data` was called once on the returned store + mock_store.open_data.assert_called_once_with("mock-dataset-id") + + # Validate logging behavior + mock_logger().info.assert_any_call( + "Attempting to open dataset with configuration: Public store" + ) + mock_logger().info.assert_any_call( + "Successfully opened dataset with configuration: Public store" + ) + + @patch("deep_code.utils.dataset_stac_generator.new_data_store") + @patch("deep_code.utils.dataset_stac_generator.logging.getLogger") + def test_open_dataset_success_authenticated_store( + self, mock_logger, mock_new_data_store + ): + """Test dataset opening with the authenticated store configuration.""" + # Simulate public store failure + mock_store = MagicMock() + mock_new_data_store.side_effect = [ + Exception("Public store failure"), + # First call (public store) raises an exception + mock_store, + # Second call (authenticated store) returns a mock store + ] + mock_store.open_data.return_value = "mock_dataset" + + os.environ["S3_USER_STORAGE_BUCKET"] = "mock-bucket" + os.environ["S3_USER_STORAGE_KEY"] = "mock-key" + os.environ["S3_USER_STORAGE_SECRET"] = "mock-secret" + + generator = OSCProductSTACGenerator("mock-dataset-id", "mock-collection-id") + + # Validate that the dataset was successfully opened with the authenticated store + self.assertEqual(generator.dataset, "mock_dataset") + self.assertEqual(mock_new_data_store.call_count, 2) + + # Validate calls to `new_data_store` + mock_new_data_store.assert_any_call( + "s3", root="deep-esdl-public", storage_options={"anon": True} + ) + mock_new_data_store.assert_any_call( + "s3", + root="mock-bucket", + storage_options={"anon": False, "key": "mock-key", "secret": "mock-secret"}, + ) + + # Validate logging calls + mock_logger().info.assert_any_call( + "Attempting to open dataset with configuration: Public store" + ) + mock_logger().info.assert_any_call( + "Attempting to open dataset with configuration: Authenticated store" + ) + mock_logger().info.assert_any_call( + "Successfully opened dataset with configuration: Authenticated store" + ) + + @patch("deep_code.utils.dataset_stac_generator.new_data_store") + @patch("deep_code.utils.dataset_stac_generator.logging.getLogger") + def test_open_dataset_failure(self, mock_logger, mock_new_data_store): + """Test dataset opening failure with all configurations.""" + # Simulate all store failures + mock_new_data_store.side_effect = Exception("Store failure") + os.environ["S3_USER_STORAGE_BUCKET"] = "mock-bucket" + os.environ["S3_USER_STORAGE_KEY"] = "mock-key" + os.environ["S3_USER_STORAGE_SECRET"] = "mock-secret" + + with self.assertRaises(ValueError) as context: + OSCProductSTACGenerator("mock-dataset-id", "mock-collection-id") + + self.assertIn( + "Failed to open Zarr dataset with ID mock-dataset-id", + str(context.exception), + ) + self.assertIn("Public store, Authenticated store", str(context.exception)) + self.assertEqual(mock_new_data_store.call_count, 2) diff --git a/deep_code/tests/utils/test_github_automation.py b/deep_code/tests/utils/test_github_automation.py new file mode 100644 index 0000000..58acc09 --- /dev/null +++ b/deep_code/tests/utils/test_github_automation.py @@ -0,0 +1,119 @@ +import unittest +from unittest.mock import patch, MagicMock +from pathlib import Path +import json +from deep_code.utils.github_automation import GitHubAutomation + + +class TestGitHubAutomation(unittest.TestCase): + def setUp(self): + self.github = GitHubAutomation( + username="test-user", + token="test-token", + repo_owner="test-owner", + repo_name="test-repo", + ) + + @patch("requests.post") + def test_fork_repository(self, mock_post): + """Test the fork_repository method.""" + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_post.return_value = mock_response + + self.github.fork_repository() + + mock_post.assert_called_once_with( + "https://api.github.com/repos/test-owner/test-repo/forks", + headers={"Authorization": "token test-token"}, + ) + + @patch("subprocess.run") + @patch("os.chdir") + def test_clone_repository(self, mock_chdir, mock_run): + """Test the clone_repository method.""" + self.github.clone_repository() + + mock_run.assert_called_once_with( + ["git", "clone", self.github.fork_repo_url, self.github.local_clone_dir], + check=True, + ) + mock_chdir.assert_called_once_with(self.github.local_clone_dir) + + @patch("subprocess.run") + def test_create_branch(self, mock_run): + """Test the create_branch method.""" + branch_name = "test-branch" + self.github.create_branch(branch_name) + + mock_run.assert_called_once_with( + ["git", "checkout", "-b", branch_name], check=True + ) + + @patch("subprocess.run") + @patch("builtins.open", new_callable=unittest.mock.mock_open) + @patch("pathlib.Path.mkdir") + def test_add_file(self, mock_mkdir, mock_open, mock_run): + """Test the add_file method.""" + file_path = "test-dir/test-file.json" + content = {"key": "value"} + + self.github.add_file(file_path, content) + + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_open.assert_called_once_with( + Path(self.github.local_clone_dir) / file_path, "w" + ) + mock_open().write.assert_called_once_with(json.dumps(content, indent=2)) + mock_run.assert_called_once_with( + ["git", "add", str(Path(self.github.local_clone_dir) / file_path)], + check=True, + ) + + @patch("subprocess.run") + def test_commit_and_push(self, mock_run): + """Test the commit_and_push method.""" + branch_name = "test-branch" + commit_message = "Test commit message" + + self.github.commit_and_push(branch_name, commit_message) + + mock_run.assert_any_call(["git", "commit", "-m", commit_message], check=True) + mock_run.assert_any_call( + ["git", "push", "-u", "origin", branch_name], check=True + ) + + @patch("requests.post") + def test_create_pull_request(self, mock_post): + """Test the create_pull_request method.""" + branch_name = "test-branch" + pr_title = "Test PR" + pr_body = "This is a test PR" + base_branch = "main" + + mock_response = MagicMock() + mock_response.json.return_value = {"html_url": "https://github.com/test-pr"} + mock_response.raise_for_status.return_value = None + mock_post.return_value = mock_response + + self.github.create_pull_request(branch_name, pr_title, pr_body, base_branch) + + mock_post.assert_called_once_with( + "https://api.github.com/repos/test-owner/test-repo/pulls", + headers={"Authorization": "token test-token"}, + json={ + "title": pr_title, + "head": f"test-user:{branch_name}", + "base": base_branch, + "body": pr_body, + }, + ) + + @patch("subprocess.run") + @patch("os.chdir") + def test_clean_up(self, mock_chdir, mock_run): + """Test the clean_up method.""" + self.github.clean_up() + + mock_chdir.assert_called_once_with("..") + mock_run.assert_called_once_with(["rm", "-rf", self.github.local_clone_dir]) diff --git a/deep_code/tests/utils/test_osc_extension.py b/deep_code/tests/utils/test_osc_extension.py new file mode 100644 index 0000000..66300cc --- /dev/null +++ b/deep_code/tests/utils/test_osc_extension.py @@ -0,0 +1,115 @@ +import unittest +from pystac import Collection, Extent, SpatialExtent, TemporalExtent +from deep_code.utils.osc_extension import OscExtension + + +class TestOscExtension(unittest.TestCase): + def setUp(self): + """Set up a test Collection object and attach the OscExtension.""" + self.collection = Collection( + id="test-collection", + description="Test collection for unit tests", + extent=Extent( + spatial=SpatialExtent([[-180, -90, 180, 90]]), + temporal=TemporalExtent( + [["2022-01-01T00:00:00Z", "2023-01-01T00:00:00Z"]] + ), + ), + stac_extensions=[], + ) + OscExtension.add_to(self.collection) + + def test_osc_status(self): + """Test the osc:status property.""" + extension = OscExtension.ext(self.collection) + extension.osc_status = "ongoing" + self.assertEqual(extension.osc_status, "ongoing") + + def test_osc_region(self): + """Test the osc:region property.""" + extension = OscExtension.ext(self.collection) + extension.osc_region = "Mediterranean region" + self.assertEqual(extension.osc_region, "Mediterranean region") + + def test_osc_themes(self): + """Test the osc:themes property.""" + extension = OscExtension.ext(self.collection) + extension.osc_themes = ["land", "ocean"] + self.assertEqual(extension.osc_themes, ["land", "ocean"]) + + def test_osc_missions(self): + """Test the osc:missions property.""" + extension = OscExtension.ext(self.collection) + extension.osc_missions = ["mission1", "mission2"] + self.assertEqual(extension.osc_missions, ["mission1", "mission2"]) + + def test_keywords(self): + """Test the keywords property.""" + extension = OscExtension.ext(self.collection) + extension.keywords = ["Hydrology", "Remote Sensing"] + self.assertEqual(extension.keywords, ["Hydrology", "Remote Sensing"]) + + def test_cf_parameters(self): + """Test the cf:parameter property.""" + extension = OscExtension.ext(self.collection) + extension.cf_parameter = [{"name": "hydrology-4D"}] + self.assertEqual(extension.cf_parameter, [{"name": "hydrology-4D"}]) + + def test_created_updated(self): + """Test the created and updated properties.""" + extension = OscExtension.ext(self.collection) + extension.created = "2023-12-21T11:50:17Z" + extension.updated = "2023-12-21T11:50:17Z" + self.assertEqual(extension.created, "2023-12-21T11:50:17Z") + self.assertEqual(extension.updated, "2023-12-21T11:50:17Z") + + def test_set_extent(self): + """Test setting spatial and temporal extent.""" + extension = OscExtension.ext(self.collection) + spatial = [[-5.7, 28.3, 37.7, 48.1]] + temporal = [["2014-12-31T12:00:00Z", "2022-10-06T12:00:00Z"]] + extension.set_extent(spatial, temporal) + + self.assertEqual(self.collection.extent.spatial.bboxes, spatial) + self.assertEqual(self.collection.extent.temporal.intervals, temporal) + + def test_validation_success(self): + """Test validation with all required fields.""" + extension = OscExtension.ext(self.collection) + extension.osc_type = "product" + extension.osc_project = "test-project" + extension.osc_status = "ongoing" + extension.validate_extension() # Should not raise an exception + + def test_add_osc_extension(self): + osc_ext = OscExtension.add_to(self.collection) + self.assertEqual(OscExtension.get_schema_uri(), self.collection.stac_extensions) + self.assertIsInstance(osc_ext, OscExtension) + + def test_has_extension(self): + self.collection.stac_extensions = [] + self.assertFalse(OscExtension.has_extension(self.collection)) + OscExtension.add_to(self.collection) + self.assertTrue(OscExtension.has_extension(self.collection)) + + def test_set_and_get_properties(self): + osc_ext = OscExtension.add_to(self.collection) + osc_ext.osc_type = "example-type" + osc_ext.osc_project = "example-project" + osc_ext.osc_product = "example-product" + osc_ext.osc_theme = ["example-theme"] + osc_ext.osc_variables = ["var1", "var2", "var3"] + + self.assertEqual(osc_ext.osc_type, "example-type") + self.assertEqual(osc_ext.osc_project, "example-project") + self.assertEqual(osc_ext.osc_product, "example-product") + self.assertEqual(osc_ext.osc_theme, ["example-theme"]) + self.assertListEqual(osc_ext.osc_variables, ["var1", "var2", "var3"]) + + def test_validation_missing_fields(self): + """Test validation with missing required fields.""" + extension = OscExtension.ext(self.collection) + with self.assertRaises(ValueError) as context: + extension.validate_extension() + self.assertIn("Missing required fields", str(context.exception)) + self.assertIn("osc:type", str(context.exception)) diff --git a/deep_code/tools/__init__.py b/deep_code/tools/__init__.py new file mode 100644 index 0000000..073ddd0 --- /dev/null +++ b/deep_code/tools/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. diff --git a/deep_code/tools/check.py b/deep_code/tools/check.py new file mode 100644 index 0000000..3b54c65 --- /dev/null +++ b/deep_code/tools/check.py @@ -0,0 +1,4 @@ +""" +Verify the readiness of a dataset or an existing workflow repository for experiment +publication by identifying any issues or missing components +""" diff --git a/deep_code/tools/new.py b/deep_code/tools/new.py new file mode 100644 index 0000000..3d1ed1e --- /dev/null +++ b/deep_code/tools/new.py @@ -0,0 +1,5 @@ +"""Logic for initializing repositories + Initialize a GitHub repository with the proposed configurations files, an initial + workflow notebook template (e.g. workflow.ipynb), a template Python package (code and +pyproject.toml), and a template setup for documentation (e.g., using mkdocs), +setup of thebuild pipeline""" diff --git a/deep_code/tools/publish.py b/deep_code/tools/publish.py new file mode 100644 index 0000000..26b49f3 --- /dev/null +++ b/deep_code/tools/publish.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +import fsspec +import logging +import yaml + +from deep_code.constants import OSC_REPO_OWNER, OSC_REPO_NAME, OSC_BRANCH_NAME +from deep_code.utils.dataset_stac_generator import OSCProductSTACGenerator +from deep_code.utils.github_automation import GitHubAutomation + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +class DatasetPublisher: + """ + Publishes products to the OSC GitHub repository. + + Credentials must be provided via a hidden file named `.gitaccess`, located in + the root of the repository. This file is expected to contain YAML of the form: + + github-username: "YOUR_GITHUB_USERNAME" + github-token: "YOUR_GITHUB_PERSONAL_ACCESS_TOKEN" + """ + + def __init__(self): + with fsspec.open(".gitaccess", "r") as file: + git_config = yaml.safe_load(file) or {} + + self.github_username = git_config.get("github-username") + self.github_token = git_config.get("github-token") + + if not self.github_username or not self.github_token: + raise ValueError("GitHub credentials are missing in the `.gitaccess` file.") + + self.github_automation = GitHubAutomation( + self.github_username, self.github_token, OSC_REPO_OWNER, OSC_REPO_NAME + ) + + def publish_dataset(self, dataset_config_path: str): + """Publish a product collection to the specified GitHub repository. + + Args: + dataset_config_path: Path to the YAML file containing dataset config + """ + with fsspec.open(dataset_config_path, "r") as file: + dataset_config = yaml.safe_load(file) + + dataset_id = dataset_config.get("dataset-id") + collection_id = dataset_config.get("collection-id") + documentation_link = dataset_config.get("documentation-link") + access_link = dataset_config.get("access-link") + dataset_status = dataset_config.get("dataset-status") + osc_region = dataset_config.get("dataset-region") + dataset_theme = dataset_config.get("dataset-theme") + cf_params = dataset_config.get("cf-parameter") + + if not dataset_id or not collection_id: + raise ValueError( + "Dataset ID or Collection ID is missing in the dataset-config.yaml " + "file." + ) + + try: + logger.info("Generating STAC collection...") + generator = OSCProductSTACGenerator( + dataset_id=dataset_id, + collection_id=collection_id, + documentation_link=documentation_link, + access_link=access_link, + osc_status=dataset_status, + osc_region=osc_region, + osc_themes=dataset_theme, + cf_params=cf_params, + ) + collection = generator.build_stac_collection() + + file_path = f"products/{collection_id}/collection.json" + logger.info("Automating GitHub tasks...") + self.github_automation.fork_repository() + self.github_automation.clone_repository() + OSC_NEW_BRANCH_NAME = OSC_BRANCH_NAME + "-" + collection_id + self.github_automation.create_branch(OSC_NEW_BRANCH_NAME) + self.github_automation.add_file(file_path, collection.to_dict()) + self.github_automation.commit_and_push( + OSC_NEW_BRANCH_NAME, f"Add new collection:{collection_id}" + ) + pr_url = self.github_automation.create_pull_request( + OSC_NEW_BRANCH_NAME, + f"Add new collection", + "This PR adds a new collection to the repository.", + ) + + logger.info(f"Pull request created: {pr_url}") + + finally: + self.github_automation.clean_up() diff --git a/deep_code/api/register.py b/deep_code/tools/register.py similarity index 100% rename from deep_code/api/register.py rename to deep_code/tools/register.py diff --git a/deep_code/tools/setup_ci.py b/deep_code/tools/setup_ci.py new file mode 100644 index 0000000..889d59b --- /dev/null +++ b/deep_code/tools/setup_ci.py @@ -0,0 +1 @@ +"""Logic for setting up build pipelines""" diff --git a/deep_code/tools/test.py b/deep_code/tools/test.py new file mode 100644 index 0000000..5bdf092 --- /dev/null +++ b/deep_code/tools/test.py @@ -0,0 +1,2 @@ +""" Execute the application package of a published experiment on a subset of input data +to verify the reproducibility is achieved""" diff --git a/deep_code/utils/__init__.py b/deep_code/utils/__init__.py new file mode 100644 index 0000000..073ddd0 --- /dev/null +++ b/deep_code/utils/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. diff --git a/deep_code/utils/dataset_stac_generator.py b/deep_code/utils/dataset_stac_generator.py new file mode 100644 index 0000000..21f4cf8 --- /dev/null +++ b/deep_code/utils/dataset_stac_generator.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +import os +import logging +from datetime import datetime, timezone + +import pandas as pd +from pystac import Collection, Extent, Link, SpatialExtent, TemporalExtent +from xcube.core.store import new_data_store + +from deep_code.utils.osc_extension import OscExtension + + +class OSCProductSTACGenerator: + """Generates OSC STAC Collections for a product from Zarr datasets. + + Args: + dataset_id: ID of the Zarr dataset. + collection_id: Unique identifier for the STAC collection. + access_link: Public access link to the dataset. + documentation_link: Link to dataset documentation. + osc_status: Status of the dataset (e.g., "ongoing"). + osc_region: Geographical region associated with the dataset. + osc_themes: List of themes related to the dataset (e.g., ["climate"]). + osc_missions: List of satellite missions associated with the dataset. + cf_params: CF metadata parameters for the dataset. + """ + + def __init__( + self, + dataset_id: str, + collection_id: str, + access_link: str | None = None, + documentation_link: str | None = None, + osc_status: str = "ongoing", + osc_region: str = "Global", + osc_themes: list[str] | None = None, + osc_missions: list[str] | None = None, + cf_params: list[dict[str]] | None = None, + ): + self.dataset_id = dataset_id + self.collection_id = collection_id + self.access_link = access_link or f"s3://deep-esdl-public/{dataset_id}" + self.documentation_link = documentation_link + self.osc_status = osc_status + self.osc_region = osc_region + self.osc_themes = osc_themes or [] + self.osc_missions = osc_missions or [] + self.cf_params = cf_params or {} + self.logger = logging.getLogger(__name__) + self.dataset = self._open_dataset() + + def _open_dataset(self): + """Open the dataset using a S3 store as a xarray Dataset.""" + + store_configs = [ + { + "description": "Public store", + "params": { + "storage_type": "s3", + "root": "deep-esdl-public", + "storage_options": {"anon": True}, + }, + }, + { + "description": "Authenticated store", + "params": { + "storage_type": "s3", + "root": os.environ.get("S3_USER_STORAGE_BUCKET"), + "storage_options": { + "anon": False, + "key": os.environ.get("S3_USER_STORAGE_KEY"), + "secret": os.environ.get("S3_USER_STORAGE_SECRET"), + }, + }, + }, + ] + + # Iterate through configurations and attempt to open the dataset + last_exception = None + tried_configurations = [] + for config in store_configs: + tried_configurations.append(config["description"]) + try: + self.logger.info( + f"Attempting to open dataset with configuration: " + f"{config['description']}" + ) + store = new_data_store( + config["params"]["storage_type"], + root=config["params"]["root"], + storage_options=config["params"]["storage_options"], + ) + dataset = store.open_data(self.dataset_id) + self.logger.info( + f"Successfully opened dataset with configuration: " + f"{config['description']}" + ) + return dataset + except Exception as e: + self.logger.error( + f"Failed to open dataset with configuration: " + f"{config['description']}. Error: {e}" + ) + last_exception = e + + raise ValueError( + f"Failed to open Zarr dataset with ID {self.dataset_id}. " + f"Tried configurations: {', '.join(tried_configurations)}. " + f"Last error: {last_exception}" + ) + + def _get_spatial_extent(self) -> SpatialExtent: + """Extract spatial extent from the dataset.""" + if {"lon", "lat"}.issubset(self.dataset.coords): + # For regular gridding + lon_min, lon_max = ( + float(self.dataset.lon.min()), + float(self.dataset.lon.max()), + ) + lat_min, lat_max = ( + float(self.dataset.lat.min()), + float(self.dataset.lat.max()), + ) + return SpatialExtent([[lon_min, lat_min, lon_max, lat_max]]) + elif {"longitude", "latitude"}.issubset(self.dataset.coords): + # For regular gridding with 'longitude' and 'latitude' + lon_min, lon_max = ( + float(self.dataset.longitude.min()), + float(self.dataset.longitude.max()), + ) + lat_min, lat_max = ( + float(self.dataset.latitude.min()), + float(self.dataset.latitude.max()), + ) + return SpatialExtent([[lon_min, lat_min, lon_max, lat_max]]) + elif {"x", "y"}.issubset(self.dataset.coords): + # For irregular gridding + x_min, x_max = (float(self.dataset.x.min()), float(self.dataset.x.max())) + y_min, y_max = (float(self.dataset.y.min()), float(self.dataset.y.max())) + return SpatialExtent([[x_min, y_min, x_max, y_max]]) + else: + raise ValueError( + "Dataset does not have recognized spatial coordinates " + "('lon', 'lat' or 'x', 'y')." + ) + + def _get_temporal_extent(self) -> TemporalExtent: + """Extract temporal extent from the dataset.""" + if "time" in self.dataset.coords: + try: + # Convert the time bounds to datetime objects + time_min = pd.to_datetime( + self.dataset.time.min().values + ).to_pydatetime() + time_max = pd.to_datetime( + self.dataset.time.max().values + ).to_pydatetime() + return TemporalExtent([[time_min, time_max]]) + except Exception as e: + raise ValueError(f"Failed to parse temporal extent: {e}") + else: + raise ValueError("Dataset does not have a 'time' coordinate.") + + @staticmethod + def _normalize_name(name: str | None) -> str | None: + return name.replace(" ", "-").lower() if name else None + + def _get_variables(self) -> list[str]: + """Extracts variable names or descriptions from the dataset. + + Variables are prioritized based on their `long_name` or `standard_name` + attributes. If neither is available, the variable's key from + `dataset.data_vars.keys()` is used. + + Returns: + A list of variable names or descriptions. + """ + variables = [] + for var_name, variable in self.dataset.data_vars.items(): + long_name = self._normalize_name(variable.attrs.get("long_name")) + standard_name = self._normalize_name(variable.attrs.get("standard_name")) + if not long_name and not standard_name: + self.logger.error( + f"Metadata missing for variable '{var_name}': 'long_name' and " + f"'standard_name' attributes are not available." + ) + # Prioritize 'long_name', fallback to 'standard_name', then use variable key + variables.append(long_name or standard_name or var_name) + return variables + + def _get_general_metadata(self) -> dict: + return { + "description": self.dataset.attrs.get( + "description", "No description available." + ) + } + + def _get_variable_metadata(self, var_name, var_data) -> dict: + """Extract metadata from a single variable's attributes. + + Args: + var_name: The raw variable name in the dataset. + var_data: An xarray DataArray containing variable data and attrs. + + Returns: + A dict with 'id', 'title', and 'description'. + """ + long_name = var_data.attrs.get("long_name") + standard_name = var_data.attrs.get("standard_name") + title = long_name or standard_name or var_name + + normalized_title = self._normalize_name(title) + + description = var_data.attrs.get("description", "No variable description") + + return {"id": var_name, "title": normalized_title, "description": description} + + def build_stac_collection(self) -> Collection: + """ + Build an OSC STAC Collection for the dataset. + + :return: A pystac.Collection object. + """ + try: + spatial_extent = self._get_spatial_extent() + temporal_extent = self._get_temporal_extent() + variables = self._get_variables() + general_metadata = self._get_general_metadata() + except ValueError as e: + raise ValueError(f"Metadata extraction failed: {e}") + + # Build base STAC Collection + collection = Collection( + id=self.collection_id, + description=general_metadata.get("description", "No description provided."), + extent=Extent(spatial=spatial_extent, temporal=temporal_extent), + ) + + # Add OSC extension metadata + osc_extension = OscExtension.add_to(collection) + # osc_project and osc_type are fixed constant values + osc_extension.osc_project = "deep-earth-system-data-lab" + osc_extension.osc_type = "product" + osc_extension.osc_status = self.osc_status + osc_extension.osc_region = self.osc_region + osc_extension.osc_themes = self.osc_themes + osc_extension.osc_variables = variables + osc_extension.osc_missions = self.osc_missions + if self.cf_params: + osc_extension.cf_parameter = self.cf_params + else: + osc_extension.cf_parameter = [{"name": self.collection_id}] + + # Add creation and update timestamps for the collection + now_iso = datetime.now(timezone.utc).isoformat() + collection.extra_fields["created"] = now_iso + collection.extra_fields["updated"] = now_iso + + # Remove any existing root link and re-add it properly + collection.remove_links("root") + collection.add_link( + Link( + rel="root", + target="../../catalog.json", + media_type="application/json", + title="Open Science Catalog", + ) + ) + collection.add_link(Link(rel="via", target=self.access_link, title="Access")) + if self.documentation_link: + collection.add_link( + Link(rel="via", target=self.documentation_link, title="Documentation") + ) + collection.add_link( + Link( + rel="parent", + target="../catalog.json", + media_type="application/json", + title="Products", + ) + ) + + self_href = ( + "https://esa-earthcode.github.io/" + "open-science-catalog-metadata/products/deepesdl/collection.json" + ) + collection.set_self_href(self_href) + + # Validate OSC extension fields + try: + osc_extension.validate_extension() + except ValueError as e: + raise ValueError(f"OSC Extension validation failed: {e}") + + return collection diff --git a/deep_code/utils/github_automation.py b/deep_code/utils/github_automation.py new file mode 100644 index 0000000..d934d2a --- /dev/null +++ b/deep_code/utils/github_automation.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +import json +import logging +import os +import requests +import subprocess +from pathlib import Path + + +class GitHubAutomation: + """Automates GitHub operations needed to create a Pull Request. + + Args: + username: GitHub username. + token: Personal access token for GitHub. + repo_owner: Owner of the repository to fork. + repo_name: Name of the repository to fork. + """ + + def __init__(self, username: str, token: str, repo_owner: str, repo_name: str): + self.username = username + self.token = token + self.repo_owner = repo_owner + self.repo_name = repo_name + self.base_repo_url = f"https://github.com/{repo_owner}/{repo_name}.git" + self.fork_repo_url = ( + f"https://{username}:{token}@github.com/{username}/{repo_name}.git" + ) + self.local_clone_dir = os.path.join(os.path.expanduser("~"), "temp_repo") + + def fork_repository(self): + """Fork the repository to the user's GitHub account.""" + logging.info("Forking repository...") + url = f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/forks" + headers = {"Authorization": f"token {self.token}"} + response = requests.post(url, headers=headers) + response.raise_for_status() + logging.info(f"Repository forked to {self.username}/{self.repo_name}") + + def clone_repository(self): + """Clone the forked repository locally.""" + logging.info("Cloning forked repository...") + try: + subprocess.run( + ["git", "clone", self.fork_repo_url, self.local_clone_dir], check=True + ) + os.chdir(self.local_clone_dir) + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to clone repository: {e}") + + @staticmethod + def create_branch(branch_name: str): + """Create a new branch in the local repository.""" + logging.info(f"Creating new branch: {branch_name}...") + try: + subprocess.run(["git", "checkout", "-b", branch_name], check=True) + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed Creating branch: '{branch_name}': {e}") + + def add_file(self, file_path: str, content): + """Add a new file to the local repository.""" + logging.info(f"Adding new file: {file_path}...") + full_path = Path(self.local_clone_dir) / file_path + full_path.parent.mkdir(parents=True, exist_ok=True) + with open(full_path, "w") as f: + # Convert content to dictionary if it's a PySTAC object + if hasattr(content, "to_dict"): + content = content.to_dict() + f.write(json.dumps(content, indent=2)) + try: + subprocess.run(["git", "add", str(full_path)], check=True) + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to add file '{file_path}': {e}") + + @staticmethod + def commit_and_push(branch_name: str, commit_message: str): + """Commit changes and push to the forked repository.""" + logging.info("Committing and pushing changes...") + try: + subprocess.run(["git", "commit", "-m", commit_message], check=True) + subprocess.run(["git", "push", "-u", "origin", branch_name], check=True) + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to commit and push: {e}") + + def create_pull_request( + self, branch_name: str, pr_title: str, pr_body: str, base_branch: str = "main" + ): + """Create a pull request from the forked repository to the base repository.""" + logging.info("Creating a pull request...") + url = f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/pulls" + headers = {"Authorization": f"token {self.token}"} + data = { + "title": pr_title, + "head": f"{self.username}:{branch_name}", + "base": base_branch, + "body": pr_body, + } + response = requests.post(url, headers=headers, json=data) + response.raise_for_status() + pr_url = response.json()["html_url"] + logging.info(f"Pull request created: {pr_url}") + + def clean_up(self): + """Clean up the local cloned repository.""" + logging.info("Cleaning up local repository...") + os.chdir("..") + try: + subprocess.run(["rm", "-rf", self.local_clone_dir]) + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to clean-up local repository: {e}") diff --git a/deep_code/utils/osc_extension.py b/deep_code/utils/osc_extension.py new file mode 100644 index 0000000..6aa7519 --- /dev/null +++ b/deep_code/utils/osc_extension.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2025 by Brockmann Consult GmbH +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +from typing import Literal + +import pystac +from pystac import SpatialExtent, TemporalExtent, Extent +from pystac.extensions.base import PropertiesExtension, ExtensionManagementMixin + +from deep_code.constants import OSC_SCHEMA_URI, CF_SCHEMA_URI + + +class OscExtension( + PropertiesExtension, ExtensionManagementMixin[pystac.Item | pystac.Collection] +): + """Handles the OSC extension for STAC Items and Collections. + + Args: + obj: The STAC Item or Collection to which the OSC extension is applied. + """ + + name: Literal["osc"] = "osc" + + def __init__(self, obj: pystac.Item | pystac.Collection): + if isinstance(obj, pystac.Collection): + self.properties = obj.extra_fields + else: + self.properties = obj.properties + self.obj = obj + + @property + def osc_type(self) -> str | None: + return self._get_property("osc:type", str) + + @osc_type.setter + def osc_type(self, v: str) -> None: + self._set_property("osc:type", v, pop_if_none=False) + + @property + def osc_name(self) -> str | None: + return self._get_property("osc:name", str) + + @osc_name.setter + def osc_name(self, v: str) -> None: + self._set_property("osc:name", v, pop_if_none=False) + + @property + def osc_status(self) -> str | None: + return self._get_property("osc:status", str) + + @osc_status.setter + def osc_status(self, value: str) -> None: + self._set_property("osc:status", value, pop_if_none=False) + + @property + def osc_project(self) -> str | None: + return self._get_property("osc:project", str) + + @osc_project.setter + def osc_project(self, v: str) -> None: + self._set_property("osc:project", v, pop_if_none=False) + + @property + def osc_themes(self) -> list[str] | None: + return self._get_property("osc:themes", list) + + @osc_themes.setter + def osc_themes(self, value: list[str]) -> None: + if not isinstance(value, list) or not all( + isinstance(item, str) for item in value + ): + raise ValueError("osc:themes must be a list of strings") + self._set_property("osc:themes", value, pop_if_none=False) + + @property + def osc_region(self) -> str | None: + return self._get_property("osc:region", str) + + @osc_region.setter + def osc_region(self, value: str) -> None: + self._set_property("osc:region", value, pop_if_none=False) + + @property + def osc_missions(self) -> list[str] | None: + return self._get_property("osc:missions", list) + + @osc_missions.setter + def osc_missions(self, value: list[str]) -> None: + if not isinstance(value, list) or not all( + isinstance(item, str) for item in value + ): + raise ValueError("osc:missions must be a list of strings") + self._set_property("osc:missions", value, pop_if_none=False) + + def set_extent(self, spatial: list[list[float]], temporal: list[list[str]]) -> None: + self.obj.extent = Extent(SpatialExtent(spatial), TemporalExtent(temporal)) + + @property + def osc_variables(self) -> list[str] | None: + return self._get_property("osc:variables", list) + + @osc_variables.setter + def osc_variables(self, v: list[str]) -> None: + if not isinstance(v, list) or not all(isinstance(item, str) for item in v): + raise ValueError("osc:variables must be a list of strings") + self._set_property("osc:variables", v, pop_if_none=False) + + @property + def keywords(self) -> list[str] | None: + return self._get_property("keywords", list) + + @keywords.setter + def keywords(self, value: list[str]) -> None: + if not isinstance(value, list) or not all( + isinstance(item, str) for item in value + ): + raise ValueError("keywords must be a list of strings") + self._set_property("keywords", value, pop_if_none=False) + + @property + def cf_parameter(self) -> list[dict] | None: + return self._get_property("cf:parameter", list) + + @cf_parameter.setter + def cf_parameter(self, value: list[dict]) -> None: + if not isinstance(value, list) or not all( + isinstance(item, dict) for item in value + ): + raise ValueError("cf:parameter must be a list of dictionaries") + self._set_property("cf:parameter", value, pop_if_none=False) + + @property + def created(self) -> str | None: + return self._get_property("created", str) + + @created.setter + def created(self, value: str) -> None: + self._set_property("created", value, pop_if_none=False) + + @property + def updated(self) -> str | None: + return self._get_property("updated", str) + + @updated.setter + def updated(self, value: str) -> None: + self._set_property("updated", value, pop_if_none=False) + + @classmethod + def get_schema_uri(cls) -> list[str]: + return [OSC_SCHEMA_URI, CF_SCHEMA_URI] + + @classmethod + def ext( + cls, obj: pystac.Item | pystac.Collection, add_if_missing: bool = False + ) -> "OscExtension": + """Returns the OscExtension instance for the given object, adding the extension + if missing.""" + if cls.has_extension(obj): + return OscExtension(obj) + elif add_if_missing: + return cls.add_to(obj) + else: + raise ValueError( + "OSC extension is not present and add_if_missing is False." + ) + + @classmethod + def has_extension(cls, obj: pystac.Item | pystac.Collection) -> bool: + """Checks if all required extensions are present.""" + schema_uris = cls.get_schema_uri() + if isinstance(schema_uris, list): + return all(uri in obj.stac_extensions for uri in schema_uris) + elif isinstance(schema_uris, str): + return schema_uris in obj.stac_extensions + + @classmethod + def add_to(cls, obj: pystac.Item | pystac.Collection) -> "OscExtension": + """Adds the OSC and CF extensions to the object's extensions.""" + schema_uris = cls.get_schema_uri() + if isinstance(schema_uris, list): # Handle list of URIs + for uri in schema_uris: + if uri not in obj.stac_extensions: + obj.stac_extensions.append(uri) + elif isinstance(schema_uris, str): # Handle single URI + if schema_uris not in obj.stac_extensions: + obj.stac_extensions.append(schema_uris) + return OscExtension(obj) + + def validate_extension(self) -> None: + """Validates that all required fields for the OSC extension are set.""" + required_fields = ["osc:type", "osc:project", "osc:status"] + missing_fields = [ + field + for field in required_fields + if self._get_property(field, None) is None + ] + if missing_fields: + raise ValueError(f"Missing required fields: {', '.join(missing_fields)}") diff --git a/deep_code/version.py b/deep_code/version.py index edabde0..e2b6e0b 100644 --- a/deep_code/version.py +++ b/deep_code/version.py @@ -1,5 +1,5 @@ # The MIT License (MIT) -# Copyright (c) 2024 by the xcube development team and contributors +# Copyright (c) 2024 by DeepESDL and Brockmann Consult GmbH # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -19,4 +19,4 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -version = "0.1.0.dev0" \ No newline at end of file +version = "0.0.1.dev0" diff --git a/environment.yml b/environment.yml index 8e231c4..9570a8d 100644 --- a/environment.yml +++ b/environment.yml @@ -3,8 +3,17 @@ channels: - conda-forge dependencies: # Required - - pystac - - jsonschema + - python >=3.10 - click + - fsspec + - jsonschema + - requests + - pandas + - pystac - pyyaml - - requests \ No newline at end of file + - xcube + - zarr >=2.11,<3 + # test dependencies + - numpy + - pytest + - pytest-cov \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b5e1746..057f7b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools >= 61.2.0"] +requires = ["setuptools >= 61.2.0", "wheel", "build"] build-backend = "setuptools.build_meta" [project] @@ -8,7 +8,7 @@ dynamic = ["version"] authors = [ {name = "Tejas Morbagal Harish", email = "tejas.morbagalharish@brockmann-consult.de"} ] -description = """\ +description = """ deepesdl earthcode integration utility tool """ keywords = [ @@ -19,9 +19,14 @@ readme = {file = "README.md", content-type = "text/markdown"} license = {text = "MIT"} requires-python = ">=3.10" dependencies = [ - "pystac", + "click", + "fsspec", "jsonschema", - "click" + "requests", + "pandas", + "pystac", + "pyyaml", + "xcube-core" ] [tool.setuptools.dynamic] @@ -37,12 +42,17 @@ exclude = [ dev = [ "black", "flake8", + "numpy", "pytest", "pytest-cov", "pytest-recording" ] +# entry point CLI +[project.scripts] +deep-code = "deep_code.cli.main:main" + [project.urls] Repository = "https://github.com/deepesdl/deep-code" Issues = "https://github.com/deepesdl/deep-code/issues" -Changelog = "https://github.com/deepesdl/deep-code/blob/main/CHANGES.md" \ No newline at end of file +Changelog = "https://github.com/deepesdl/deep-code/blob/main/CHANGES.md"