Skip to content

Commit

Permalink
Merge branch 'process-listing'
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Jan 25, 2024
2 parents 1a402c7 + 3ccaed1 commit 2cdaff2
Show file tree
Hide file tree
Showing 30 changed files with 519 additions and 182 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/internal-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ jobs:
steps:
- name: Clone repo
uses: actions/checkout@v2
with:
submodules: recursive
- name: Set up python
uses: actions/setup-python@v4
with:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/pytest-collect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:
steps:
- name: Clone repo
uses: actions/checkout@v2
with:
submodules: recursive
- name: Set up python
uses: actions/setup-python@v4
with:
Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,12 @@ process selection options:
- Example: `--process-levels=L1,L2`.`
- A level does not imply other levels, so each desired level must be specified explicitly.
For example, L2 does **not** include L1 automatically.
- `--experimental`: Enables tests for experimental processes.
By default experimental processes will be skipped.
If neither `--processes` nor `--process-levels` are specified, all processes are considered.
If both are specified, the union of both will be considered.
- `--experimental`: By default, experimental processes (or experimental process tests) are ignored.
Enabling this option will consider experimental processes and tests.
### Runner for individual process testing
Expand Down
Empty file.
122 changes: 122 additions & 0 deletions src/openeo_test_suite/lib/internal-tests/test_process_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pytest

from openeo_test_suite.lib.process_registry import ProcessRegistry


class TestProcessRegistry:
# Some example processes for some levels
PROCESS_EXAMPLES_L1 = ["add", "divide", "apply_dimension", "reduce_dimension"]
PROCESS_EXAMPLES_L2 = ["aggregate_temporal", "if"]
PROCESS_EXAMPLES_L3 = ["apply_neighborhood", "merge_cubes"]
PROCESS_EXAMPLES_EXPERIMENTAL = ["apply_polygon"]

@pytest.fixture(scope="class")
def process_registry(self) -> ProcessRegistry:
return ProcessRegistry()

def test_get_all_processes_basic(self, process_registry):
processes = list(process_registry.get_all_processes())
assert len(processes) > 0

def test_get_all_processes_add(self, process_registry):
(add,) = [
p for p in process_registry.get_all_processes() if p.process_id == "add"
]

assert add.level == "L1"
assert add.experimental is False
assert add.path.name == "add.json5"
assert len(add.tests)

add00 = {"arguments": {"x": 0, "y": 0}, "returns": 0}
assert add00 in add.tests

def test_get_all_processes_divide(self, process_registry):
(divide,) = [
p for p in process_registry.get_all_processes() if p.process_id == "divide"
]

assert divide.level == "L1"
assert divide.experimental is False
assert divide.path.name == "divide.json5"
assert len(divide.tests)

divide0 = {
"arguments": {"x": 1, "y": 0},
"returns": float("inf"),
"throws": "DivisionByZero",
}
assert divide0 in divide.tests

def test_get_processes_filtered_default(self, process_registry):
pids = [p.process_id for p in process_registry.get_processes_filtered()]
assert len(pids) > 100
for pid in (
self.PROCESS_EXAMPLES_L1
+ self.PROCESS_EXAMPLES_L2
+ self.PROCESS_EXAMPLES_L3
):
assert pid in pids
for pid in self.PROCESS_EXAMPLES_EXPERIMENTAL:
assert pid not in pids

def test_get_processes_filtered_with_process_ids(self, process_registry):
pids = [
p.process_id
for p in process_registry.get_processes_filtered(
process_ids=["add", "divide"]
)
]
assert sorted(pids) == ["add", "divide"]

def test_get_processes_filtered_with_process_levels(self, process_registry):
pids_l1 = [
p.process_id
for p in process_registry.get_processes_filtered(process_levels=["L1"])
]
pids_l23 = [
p.process_id
for p in process_registry.get_processes_filtered(
process_levels=["L2", "L3"]
)
]
for pid in self.PROCESS_EXAMPLES_L1:
assert pid in pids_l1
assert pid not in pids_l23
for pid in self.PROCESS_EXAMPLES_L2:
assert pid not in pids_l1
assert pid in pids_l23
for pid in self.PROCESS_EXAMPLES_L3:
assert pid not in pids_l1
assert pid in pids_l23
for pid in self.PROCESS_EXAMPLES_EXPERIMENTAL:
assert pid not in pids_l1
assert pid not in pids_l23

def test_get_processes_filtered_with_process_ids_and_levels(self, process_registry):
pids = [
p.process_id
for p in process_registry.get_processes_filtered(
process_ids=["min", "max"], process_levels=["L2"]
)
]
for pid in ["min", "max"] + self.PROCESS_EXAMPLES_L2:
assert pid in pids
for pid in (
self.PROCESS_EXAMPLES_L1
+ self.PROCESS_EXAMPLES_L3
+ self.PROCESS_EXAMPLES_EXPERIMENTAL
):
assert pid not in pids

def test_get_processes_filtered_with_experimental(self, process_registry):
pids = [
p.process_id
for p in process_registry.get_processes_filtered(
process_ids=["min", "max"], process_levels=["L3"], experimental=True
)
]
for pid in ["min", "max"] + self.PROCESS_EXAMPLES_L3:
assert pid in pids
for pid in self.PROCESS_EXAMPLES_EXPERIMENTAL:
assert pid in pids
20 changes: 20 additions & 0 deletions src/openeo_test_suite/lib/internal-tests/test_process_selection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from openeo_test_suite.lib.process_selection import csv_to_list


def test_csv_to_list():
assert csv_to_list() == []
assert csv_to_list("") == []
assert csv_to_list(" ") == []
assert csv_to_list(" , ") == []
assert csv_to_list("foo") == ["foo"]
assert csv_to_list("foo,bar,baz") == ["foo", "bar", "baz"]
assert csv_to_list(",foo,bar,baz,") == ["foo", "bar", "baz"]
assert csv_to_list(" ,foo , bar, baz , ") == ["foo", "bar", "baz"]
assert csv_to_list(" ,foo ,,, bar, , baz , ") == ["foo", "bar", "baz"]


def test_csv_to_list_none_on_empty():
assert csv_to_list(none_on_empty=True) is None
assert csv_to_list("", none_on_empty=True) is None
assert csv_to_list(" ", none_on_empty=True) is None
assert csv_to_list(" , ", none_on_empty=True) is None
44 changes: 44 additions & 0 deletions src/openeo_test_suite/lib/internal-tests/test_skipping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import openeo
import pytest
from openeo import DataCube

from openeo_test_suite.lib.skipping import extract_processes_from_process_graph


def test_extract_processes_from_process_graph_basic():
pg = {"add35": {"process_id": "add", "arguments": {"x": 3, "y": 5}, "result": True}}
assert extract_processes_from_process_graph(pg) == {"add"}


@pytest.fixture
def s2_cube() -> openeo.DataCube:
return openeo.DataCube.load_collection(
collection_id="S2", bands=["B02", "B03"], connection=None, fetch_metadata=False
)


def test_extract_processes_from_process_graph_cube_simple(s2_cube):
assert extract_processes_from_process_graph(s2_cube) == {"load_collection"}


def test_extract_processes_from_process_graph_cube_reduce_temporal(s2_cube):
cube = s2_cube.reduce_temporal("mean")
assert extract_processes_from_process_graph(cube) == {
"load_collection",
"reduce_dimension",
"mean",
}


def test_extract_processes_from_process_graph_cube_reduce_bands(s2_cube):
b2 = s2_cube.band("B02")
b3 = s2_cube.band("B03")
cube = (b3 - b2) / (b3 + b2)
assert extract_processes_from_process_graph(cube) == {
"load_collection",
"reduce_dimension",
"array_element",
"subtract",
"add",
"divide",
}
112 changes: 112 additions & 0 deletions src/openeo_test_suite/lib/process_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import logging
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterable, Iterator, List, Optional, Union

import json5

import openeo_test_suite

_log = logging.getLogger(__name__)


@dataclass(frozen=True)
class ProcessData:
"""Process data, including profile level and list of tests"""

process_id: str
level: str
tests: List[dict] # TODO: also make dataclass for each test?
experimental: bool
path: Path


class ProcessRegistry:
"""
Registry of processes, metadata (level, experimental flag)
and related tests defined in openeo-processes project
"""

def __init__(self, root: Optional[Path] = None):
"""
:param root: Root directory of the tests folder in openeo-processes project
"""
self._root = Path(
root
# TODO: eliminate need for this env var?
or os.environ.get("OPENEO_TEST_SUITE_PROCESSES_TEST_ROOT")
or self._guess_root()
)
# Lazy load cache
self._processes: Union[None, List[ProcessData]] = None

def _guess_root(self):
# TODO: avoid need for guessing and properly include assets in (installed) package
project_root = Path(openeo_test_suite.__file__).parents[2]
candidates = [
project_root / "assets/processes/tests",
Path("./assets/processes/tests"),
Path("./openeo-test-suite/assets/processes/tests"),
]
for candidate in candidates:
if candidate.exists() and candidate.is_dir():
return candidate
raise ValueError(
f"Could not find valid processes test root directory (tried {candidates})"
)

def _load(self) -> Iterator[ProcessData]:
"""Collect all processes"""
# TODO: cache or preload this in __init__? Or even reuse across instances?
if not self._root.is_dir():
raise ValueError(f"Invalid process test root directory: {self._root}")
_log.info(f"Loading process definitions from {self._root}")
for path in self._root.glob("*.json5"):
try:
with path.open() as f:
data = json5.load(f)
assert data["id"] == path.stem
yield ProcessData(
process_id=data["id"],
level=data.get("level"),
tests=data.get("tests", []),
experimental=data.get("experimental", False),
path=path,
)
except Exception as e:
# TODO: good idea to skip broken definitions? Why not just fail hard?
_log.error(f"Failed to load process data from {path}: {e!r}")

def get_all_processes(self) -> Iterable[ProcessData]:
if self._processes is None:
self._processes = list(self._load())
return iter(self._processes)

def get_processes_filtered(
self,
process_ids: Optional[List[str]] = None,
process_levels: Optional[List[str]] = None,
experimental: bool = False,
) -> Iterable[ProcessData]:
"""
Collect processes matching with additional filtering:
:param process_ids: allow list of process ids (empty/None means allow all)
:param process_levels: allow list of process levels (empty/None means allow all)
:param experimental: allow experimental processes or not?
"""
for process_data in self.get_all_processes():
pid = process_data.process_id
level = process_data.level

if process_data.experimental and not experimental:
continue

if process_ids and pid in process_ids:
yield process_data
elif process_levels and level in process_levels:
yield process_data
elif not process_ids and not process_levels:
# No id or level allow lists: no filtering
yield process_data
Loading

0 comments on commit 2cdaff2

Please sign in to comment.