diff --git a/.github/workflows/integrate.yaml b/.github/workflows/integrate.yaml index 49516e5..8e153ab 100644 --- a/.github/workflows/integrate.yaml +++ b/.github/workflows/integrate.yaml @@ -57,7 +57,7 @@ jobs: provider: microk8s channel: 1.25-strict/stable juju-channel: 3.1/stable - microk8s-addons: "dns hostpath-storage rbac" + microk8s-addons: "dns hostpath-storage rbac metallb:10.64.140.43-10.64.140.49" - name: Test run: sg snap_microk8s -c "tox -vve integration -- --model testing" @@ -71,19 +71,27 @@ jobs: if: failure() - name: Get envoy workload logs - run: kubectl logs --tail 100 -nci-test -ljuju-app=envoy + run: kubectl logs --tail 100 -ntesting -ljuju-app=envoy if: failure() - name: Get envoy operator logs - run: kubectl logs --tail 100 -nci-test -ljuju-operator=envoy + run: kubectl logs --tail 100 -ntesting -ljuju-operator=envoy if: failure() - name: Get mlmd workload logs - run: kubectl logs --tail 100 -nci-test -ljuju-app=mlmd + run: kubectl logs --tail 100 -ntesting -ljuju-app=mlmd if: failure() - name: Get mlmd operator logs - run: kubectl logs --tail 100 -nci-test -ljuju-operator=mlmd + run: kubectl logs --tail 100 -ntesting -ljuju-operator=mlmd + if: failure() + + - name: Get istio-pilot workload logs + run: kubectl logs --tail 100 -ntesting -ljuju-app=istio-pilot + if: failure() + + - name: Get istio-pilot operator logs + run: kubectl logs --tail 100 -ntesting -ljuju-operator=istio-pilot if: failure() - name: Collect charm debug artifacts diff --git a/metadata.yaml b/metadata.yaml index 67e53ee..6fb9dce 100755 --- a/metadata.yaml +++ b/metadata.yaml @@ -25,3 +25,43 @@ requires: interface: grpc schema: https://raw.githubusercontent.com/canonical/operator-schemas/master/grpc.yaml versions: [v1] + ingress: + interface: ingress + schema: + v2: + requires: + type: object + properties: + service: + type: string + port: + type: integer + namespace: + type: string + prefix: + type: string + rewrite: + type: string + required: + - service + - port + - namespace + - prefix + v1: + requires: + type: object + properties: + service: + type: string + port: + type: integer + prefix: + type: string + rewrite: + type: string + required: + - service + - port + - prefix + versions: [v1] + __schema_source: https://raw.githubusercontent.com/canonical/operator-schemas/master/ingress.yaml diff --git a/requirements-integration.in b/requirements-integration.in index 586ff18..884b346 100644 --- a/requirements-integration.in +++ b/requirements-integration.in @@ -2,4 +2,7 @@ juju pytest-operator selenium selenium-wire +lightkube +aiohttp +tenacity -r requirements.txt diff --git a/requirements-integration.txt b/requirements-integration.txt index 9429c3b..9b208eb 100644 --- a/requirements-integration.txt +++ b/requirements-integration.txt @@ -4,11 +4,20 @@ # # pip-compile requirements-integration.in # +aiohttp==3.8.6 + # via -r requirements-integration.in +aiosignal==1.3.1 + # via aiohttp +anyio==4.0.0 + # via httpcore asttokens==2.4.0 # via stack-data +async-timeout==4.0.3 + # via aiohttp attrs==23.1.0 # via # -r requirements.txt + # aiohttp # jsonschema # outcome # trio @@ -29,6 +38,8 @@ cachetools==5.3.1 certifi==2023.7.22 # via # -r requirements.txt + # httpcore + # httpx # kubernetes # requests # selenium @@ -40,6 +51,7 @@ cffi==1.16.0 charset-normalizer==3.3.0 # via # -r requirements.txt + # aiohttp # requests cryptography==41.0.4 # via @@ -53,11 +65,16 @@ envoy-data-plane==0.2.5 # via -r requirements.txt exceptiongroup==1.1.3 # via + # anyio # pytest # trio # trio-websocket executing==2.0.0 # via stack-data +frozenlist==1.4.0 + # via + # aiohttp + # aiosignal google-auth==2.23.3 # via kubernetes grpclib==0.4.6 @@ -65,7 +82,9 @@ grpclib==0.4.6 # -r requirements.txt # betterproto h11==0.14.0 - # via wsproto + # via + # httpcore + # wsproto h2==4.1.0 # via # -r requirements.txt @@ -75,6 +94,10 @@ hpack==4.0.0 # via # -r requirements.txt # h2 +httpcore==0.18.0 + # via httpx +httpx==0.25.0 + # via lightkube hvac==1.2.1 # via juju hyperframe==6.0.1 @@ -85,8 +108,11 @@ hyperframe==6.0.1 idna==3.4 # via # -r requirements.txt + # anyio + # httpx # requests # trio + # yarl importlib-resources==6.1.0 # via # -r requirements.txt @@ -113,6 +139,10 @@ kaitaistruct==0.10 # via selenium-wire kubernetes==27.2.0 # via juju +lightkube==0.14.0 + # via -r requirements-integration.in +lightkube-models==1.28.1.4 + # via lightkube macaroonbakery==1.3.1 # via juju markupsafe==2.1.3 @@ -122,7 +152,9 @@ matplotlib-inline==0.1.6 multidict==6.0.4 # via # -r requirements.txt + # aiohttp # grpclib + # yarl mypy-extensions==1.0.0 # via typing-inspect oauthlib==3.2.2 @@ -218,6 +250,7 @@ pyyaml==6.0.1 # -r requirements.txt # juju # kubernetes + # lightkube # ops # pytest-operator # serialized-data-interface @@ -251,13 +284,19 @@ six==1.16.0 # pymacaroons # python-dateutil sniffio==1.3.0 - # via trio + # via + # anyio + # httpcore + # httpx + # trio sortedcontainers==2.4.0 # via trio stack-data==0.6.3 # via ipython stringcase==1.2.0 # via -r requirements.txt +tenacity==8.2.3 + # via -r requirements-integration.in tomli==2.0.1 # via # ipdb @@ -299,6 +338,8 @@ wsproto==1.2.0 # via # selenium-wire # trio-websocket +yarl==1.9.2 + # via aiohttp zipp==3.17.0 # via # -r requirements.txt diff --git a/src/charm.py b/src/charm.py index 20adee0..2249987 100755 --- a/src/charm.py +++ b/src/charm.py @@ -152,6 +152,7 @@ def __init__(self, *args): self.on.leader_elected, self.on["grpc"].relation_changed, self.on["grpc-web"].relation_changed, + self.on["ingress"].relation_changed, ]: self.framework.observe(event, self.set_pod_spec) @@ -166,6 +167,9 @@ def set_pod_spec(self, event): upstreams = self._check_grpc(interfaces) self._send_info(interfaces) + + self._send_data_to_ingress_provider(interfaces) + except CheckFailed as check_failed: self.model.unit.status = check_failed.status return @@ -276,6 +280,27 @@ def _check_grpc(self, interfaces): raise CheckFailed("Waiting for upstream gRPC connection information.", WaitingStatus) return upstreams + def _send_data_to_ingress_provider(self, interfaces): + """Send data to the ingress relation data bag so the VirtualServices provider configures + a VirtualService routing traffic from `/ml_metadata` path to envoy service. + + Raises an exception and sets the charm to Blocked if there is no ingress relation available + """ + if interfaces["ingress"]: + interfaces["ingress"].send_data( + { + "prefix": "/ml_metadata", + "rewrite": "/ml_metadata", + "service": self.model.app.name, + "port": int(self.model.config["http-port"]), + } + ) + else: + raise CheckFailed( + "Please relate to istio-pilot, no ingress relation available.", + BlockedStatus, + ) + if __name__ == "__main__": main(Operator) diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index 986015e..3c8a3d5 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -5,9 +5,14 @@ import logging from pathlib import Path +import aiohttp import pytest import requests +import tenacity import yaml +from lightkube import Client +from lightkube.generic_resource import create_namespaced_resource +from lightkube.resources.core_v1 import Service log = logging.getLogger(__name__) @@ -20,6 +25,14 @@ GRAFANA = "grafana-k8s" PROMETHEUS_SCRAPE = "prometheus-scrape-config-k8s" MLMD = "mlmd" +ISTIO_PILOT = "istio-pilot" +ISTIO_GW = "istio-ingressgateway" + + +@pytest.fixture(scope="session") +def lightkube_client() -> Client: + client = Client(field_manager=APP_NAME) + return client @pytest.mark.abort_on_fail @@ -30,13 +43,55 @@ async def test_build_and_deploy(ops_test): resources = {"oci-image": image_path} await ops_test.model.deploy(charm, resources=resources) await ops_test.model.add_relation(APP_NAME, MLMD) - await ops_test.model.wait_for_idle(status="active", raise_on_blocked=True, idle_period=30) + await ops_test.model.wait_for_idle( + apps=[MLMD], status="active", raise_on_blocked=False, idle_period=30 + ) + await ops_test.model.wait_for_idle( + apps=[APP_NAME], status="blocked", raise_on_blocked=False, idle_period=30 + ) relation = ops_test.model.relations[0] assert [app.entity_id for app in relation.applications] == [APP_NAME, MLMD] assert all([endpoint.name == "grpc" for endpoint in relation.endpoints]) +@pytest.mark.abort_on_fail +async def test_virtual_service(ops_test, lightkube_client): + await ops_test.model.deploy( + ISTIO_PILOT, + channel="latest/edge", + config={"default-gateway": "kubeflow-gateway"}, + trust=True, + ) + + await ops_test.model.deploy( + "istio-gateway", + application_name=ISTIO_GW, + channel="latest/edge", + config={"kind": "ingress"}, + trust=True, + ) + await ops_test.model.add_relation(f"{ISTIO_PILOT}:{ISTIO_PILOT}", f"{ISTIO_GW}:{ISTIO_PILOT}") + await ops_test.model.add_relation(ISTIO_PILOT, APP_NAME) + + await ops_test.model.wait_for_idle( + status="active", + raise_on_blocked=False, + raise_on_error=True, + timeout=300, + ) + + # Verify that virtualService is as expected + assert_virtualservice_exists( + name=APP_NAME, namespace=ops_test.model.name, lightkube_client=lightkube_client + ) + + # Verify `/ml_metadata` endpoint is served + await assert_metadata_endpoint_is_served(ops_test, lightkube_client=lightkube_client) + + await assert_grpc_web_protocol_responds(ops_test, lightkube_client=lightkube_client) + + @pytest.mark.abort_on_fail async def test_deploy_with_prometheus_and_grafana(ops_test): scrape_config = {"scrape_interval": "30s"} @@ -74,3 +129,70 @@ async def test_correct_observability_setup(ops_test): assert response_metric["juju_charm"] == APP_NAME assert response_metric["juju_model"] == ops_test.model_name assert response_metric["juju_unit"] == f"{APP_NAME}/0" + + +def assert_virtualservice_exists(name: str, namespace: str, lightkube_client): + """Will raise a ApiError(404) if the virtualservice does not exist.""" + log.info(f"Asserting that VirtualService '{name}' exists.") + virtual_service_lightkube_resource = create_namespaced_resource( + group="networking.istio.io", + version="v1alpha3", + kind="VirtualService", + plural="virtualservices", + ) + lightkube_client.get(virtual_service_lightkube_resource, name, namespace=namespace) + log.info(f"VirtualService '{name}' exists.") + + +@tenacity.retry( + stop=tenacity.stop_after_delay(10), + wait=tenacity.wait_fixed(2), + reraise=True, +) +async def assert_metadata_endpoint_is_served(ops_test, lightkube_client): + regular_ingress_gateway_ip = await get_gateway_ip( + namespace=ops_test.model.name, lightkube_client=lightkube_client + ) + res_status, res_text = await fetch_response(f"http://{regular_ingress_gateway_ip}/ml_metadata") + assert res_status != 404 + log.info("Endpoint /ml_metadata is reachable.") + + +@tenacity.retry( + stop=tenacity.stop_after_delay(10), + wait=tenacity.wait_fixed(2), + reraise=True, +) +async def assert_grpc_web_protocol_responds(ops_test, lightkube_client): + regular_ingress_gateway_ip = await get_gateway_ip( + namespace=ops_test.model.name, lightkube_client=lightkube_client + ) + headers = {"Content-Type": "application/grpc-web-text"} + res_status, res_headers = await fetch_response( + f"http://{regular_ingress_gateway_ip}/ml_metadata", headers + ) + assert res_status == 200 + log.info("Endpoint /ml_metadata serves grpc-web protocol.") + + +async def fetch_response(url, headers=None): + """Fetch provided URL and return pair - status and text (int, string).""" + + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as response: + result_status = response.status + result_text = await response.text() + result_headers = response.headers + if headers is None: + return result_status, str(result_text) + else: + return result_status, result_headers + + +async def get_gateway_ip( + namespace: str, lightkube_client, service_name: str = "istio-ingressgateway-workload" +): + log.info(f"Getting {service_name} ingress ip") + service = lightkube_client.get(Service, service_name, namespace=namespace) + gateway_ip = service.status.loadBalancer.ingress[0].ip + return gateway_ip diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 5391520..b0a5f17 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -29,14 +29,8 @@ def test_missing_image(harness): def test_no_relation(harness): harness.set_leader(True) - harness.add_oci_resource( - "oci-image", - { - "registrypath": "ci-test", - "username": "", - "password": "", - }, - ) + add_oci_image(harness) + harness.begin_with_initial_hooks() assert harness.charm.model.unit.status == BlockedStatus("No upstream gRPC services.") @@ -44,6 +38,40 @@ def test_no_relation(harness): def test_many_relations(harness): harness.set_leader(True) + add_oci_image(harness) + + setup_grpc_relation(harness, "grpc-one", "8080") + setup_grpc_relation(harness, "grpc-two", "9090") + # In order to avoid the charm going to Blocked + setup_ingress_relation(harness) + harness.begin_with_initial_hooks() + + pod_spec, _ = harness.get_pod_spec() + + expected = yaml.safe_load(open("tests/unit/many_relations.yaml")) + + c = pod_spec["containers"][0]["volumeConfig"][0]["files"][0]["content"] + assert json.loads(c) == expected + + assert harness.charm.model.unit.status == ActiveStatus("") + + +def test_with_ingress_relation(harness): + harness.set_leader(True) + add_oci_image(harness) + + # Set required grpc relation + setup_grpc_relation(harness, "grpc-one", "8080") + + setup_ingress_relation(harness) + + harness.begin_with_initial_hooks() + + assert isinstance(harness.charm.model.unit.status, ActiveStatus) + + +# Helper functions +def add_oci_image(harness: Harness): harness.add_oci_resource( "oci-image", { @@ -52,34 +80,28 @@ def test_many_relations(harness): "password": "", }, ) - rel_id1 = harness.add_relation("grpc", "grpc-one") - harness.add_relation_unit(rel_id1, "grpc-one/0") + + +def setup_ingress_relation(harness: Harness): + rel_id = harness.add_relation("ingress", "istio-pilot") + harness.add_relation_unit(rel_id, "istio-pilot/0") harness.update_relation_data( - rel_id1, - "grpc-one", - { - "_supported_versions": "- v1", - "data": yaml.dump({"service": "grpc-one", "port": "8080"}), - }, + rel_id, + "istio-pilot", + {"_supported_versions": "- v1"}, ) + return rel_id - rel_id2 = harness.add_relation("grpc", "grpc-two") - harness.add_relation_unit(rel_id2, "grpc-two/0") + +def setup_grpc_relation(harness: Harness, name: str, port: str): + rel_id = harness.add_relation("grpc", name) + harness.add_relation_unit(rel_id, f"{name}/0") harness.update_relation_data( - rel_id2, - "grpc-two", + rel_id, + name, { "_supported_versions": "- v1", - "data": yaml.dump({"service": "grpc-two", "port": "9090"}), + "data": yaml.dump({"service": name, "port": port}), }, ) - harness.begin_with_initial_hooks() - - pod_spec, _ = harness.get_pod_spec() - - expected = yaml.safe_load(open("tests/unit/many_relations.yaml")) - - c = pod_spec["containers"][0]["volumeConfig"][0]["files"][0]["content"] - assert json.loads(c) == expected - - assert harness.charm.model.unit.status == ActiveStatus("") + return rel_id