Skip to content

Commit

Permalink
[DPE-4985] Create fixture for HA cluster set up + break HA tests into…
Browse files Browse the repository at this point in the history
… separate groups (#475)

* Create fixture for HA cluster set up + break HA tests into separate groups

* Update outdated charm libs

* Use dump plugin instead of nil to prime the scripts directory

* Update outdated charm libs

* Use standard way of priming directories in charm
  • Loading branch information
shayancanonical committed Aug 13, 2024
1 parent 88d684e commit 0934aba
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 76 deletions.
7 changes: 5 additions & 2 deletions charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ bases:
channel: "22.04"
architectures: [arm64]
parts:
files:
plugin: dump
source: .
prime:
- scripts
charm:
override-pull: |
craftctl default
Expand All @@ -25,5 +30,3 @@ parts:
- pkg-config
- rustc
- cargo
prime:
- scripts
22 changes: 22 additions & 0 deletions tests/integration/high_availability/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
from .. import juju_
from .high_availability_helpers import (
APPLICATION_DEFAULT_APP_NAME,
deploy_and_scale_application,
deploy_and_scale_mysql,
deploy_chaos_mesh,
destroy_chaos_mesh,
relate_mysql_and_application,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -53,3 +56,22 @@ def built_charm(ops_test: OpsTest) -> pathlib.Path:
charms_dst_dir = ops_test.tmp_path / "charms"
packed_charm = list(charms_dst_dir.glob("*.charm"))
return packed_charm[0].resolve(strict=True)


@pytest.fixture()
async def highly_available_cluster(ops_test: OpsTest) -> None:
"""Run the set up for high availability tests.
Args:
ops_test: The ops test framework
"""
logger.info("Deploying mysql-k8s and scaling to 3 units")
mysql_application_name = await deploy_and_scale_mysql(ops_test)

logger.info("Deploying mysql-test-app")
application_name = await deploy_and_scale_application(ops_test)

logger.info("Relating mysql-k8s with mysql-test-app")
await relate_mysql_and_application(ops_test, mysql_application_name, application_name)

yield
20 changes: 1 addition & 19 deletions tests/integration/high_availability/high_availability_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import subprocess
import tempfile
from pathlib import Path
from typing import List, Optional, Tuple
from typing import List, Optional

import kubernetes
import lightkube
Expand Down Expand Up @@ -287,24 +287,6 @@ def destroy_chaos_mesh(namespace: str) -> None:
)


async def high_availability_test_setup(ops_test: OpsTest) -> Tuple[str, str]:
"""Run the set up for high availability tests.
Args:
ops_test: The ops test framework
"""
logger.info("Deploying mysql-k8s and scaling to 3 units")
mysql_application_name = await deploy_and_scale_mysql(ops_test)

logger.info("Deploying mysql-test-app")
application_name = await deploy_and_scale_application(ops_test)

logger.info("Relating mysql-k8s with mysql-test-app")
await relate_mysql_and_application(ops_test, mysql_application_name, application_name)

return mysql_application_name, application_name


async def send_signal_to_pod_container_process(
model_name: str, unit_name: str, container_name: str, process: str, signal_code: str
) -> None:
Expand Down
16 changes: 5 additions & 11 deletions tests/integration/high_availability/test_node_drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
ensure_all_units_continuous_writes_incrementing,
ensure_n_online_mysql_members,
evict_pod,
get_application_name,
get_pod,
get_pod_pvcs,
get_pod_pvs,
high_availability_test_setup,
)

logger = logging.getLogger(__name__)
Expand All @@ -28,18 +28,12 @@


@pytest.mark.group(1)
@pytest.mark.skip_if_deployed
@pytest.mark.abort_on_fail
async def test_build_and_deploy(ops_test: OpsTest) -> None:
"""Simple test to ensure that the mysql and application charms get deployed."""
await high_availability_test_setup(ops_test)


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_pod_eviction_and_pvc_deletion(ops_test: OpsTest, continuous_writes) -> None:
async def test_pod_eviction_and_pvc_deletion(
ops_test: OpsTest, highly_available_cluster, continuous_writes
) -> None:
"""Test behavior when node drains - pod is evicted and pvs are rotated."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

logger.info("Waiting until 3 mysql instances are online")
# ensure all units in the cluster are online
Expand Down
38 changes: 18 additions & 20 deletions tests/integration/high_availability/test_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
deploy_and_scale_mysql,
ensure_all_units_continuous_writes_incrementing,
ensure_n_online_mysql_members,
high_availability_test_setup,
get_application_name,
insert_data_into_mysql_and_validate_replication,
)

Expand All @@ -32,17 +32,13 @@
TIMEOUT = 15 * 60


@pytest.mark.group(1)
async def test_build_and_deploy(ops_test: OpsTest) -> None:
"""Simple test to ensure that the mysql and application charms get deployed."""
await high_availability_test_setup(ops_test)


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_check_consistency(ops_test: OpsTest, continuous_writes) -> None:
async def test_check_consistency(
ops_test: OpsTest, highly_available_cluster, continuous_writes
) -> None:
"""Test to write to primary, and read the same data back from replicas."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

# assert that there are 3 units in the mysql cluster
assert len(ops_test.model.applications[mysql_application_name].units) == 3
Expand All @@ -54,11 +50,13 @@ async def test_check_consistency(ops_test: OpsTest, continuous_writes) -> None:
await ensure_all_units_continuous_writes_incrementing(ops_test)


@pytest.mark.group(1)
@pytest.mark.group(2)
@pytest.mark.abort_on_fail
async def test_no_replication_across_clusters(ops_test: OpsTest, continuous_writes) -> None:
async def test_no_replication_across_clusters(
ops_test: OpsTest, highly_available_cluster, continuous_writes
) -> None:
"""Test to ensure that writes to one cluster do not replicate to another cluster."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

# assert that there are 3 units in the mysql cluster
assert len(ops_test.model.applications[mysql_application_name].units) == 3
Expand Down Expand Up @@ -112,15 +110,15 @@ async def test_no_replication_across_clusters(ops_test: OpsTest, continuous_writ
await clean_up_database_and_table(ops_test, database_name, table_name)


@pytest.mark.group(1)
@pytest.mark.group(3)
@pytest.mark.abort_on_fail
async def test_scaling_without_data_loss(ops_test: OpsTest) -> None:
async def test_scaling_without_data_loss(ops_test: OpsTest, highly_available_cluster) -> None:
"""Test to ensure that data is preserved when a unit is scaled up and then down.
Ensures that there are no running continuous writes as the extra data in the
database makes scaling up slower.
"""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

# assert that there are 3 units in the mysql cluster
assert len(ops_test.model.applications[mysql_application_name].units) == 3
Expand Down Expand Up @@ -191,13 +189,13 @@ async def test_scaling_without_data_loss(ops_test: OpsTest) -> None:
await clean_up_database_and_table(ops_test, database_name, table_name)


# TODO: move test immediately after "test_build_and_deploy" once the following issue is resolved
# https://github.com/canonical/mysql-k8s-operator/issues/102
@pytest.mark.group(1)
@pytest.mark.group(4)
@pytest.mark.abort_on_fail
async def test_kill_primary_check_reelection(ops_test: OpsTest, continuous_writes) -> None:
async def test_kill_primary_check_reelection(
ops_test: OpsTest, highly_available_cluster, continuous_writes
) -> None:
"""Test to kill the primary under load and ensure re-election of primary."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

await ensure_all_units_continuous_writes_incrementing(ops_test)

Expand Down
50 changes: 26 additions & 24 deletions tests/integration/high_availability/test_self_healing.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
ensure_all_units_continuous_writes_incrementing,
ensure_n_online_mysql_members,
ensure_process_not_running,
get_application_name,
get_process_stat,
high_availability_test_setup,
insert_data_into_mysql_and_validate_replication,
isolate_instance_from_cluster,
remove_instance_isolation,
Expand All @@ -40,17 +40,13 @@
TIMEOUT = 40 * 60


@pytest.mark.group(1)
async def test_build_and_deploy(ops_test: OpsTest) -> None:
"""Simple test to ensure that the mysql and application charms get deployed."""
await high_availability_test_setup(ops_test)


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_kill_db_process(ops_test: OpsTest, continuous_writes) -> None:
async def test_kill_db_process(
ops_test: OpsTest, highly_available_cluster, continuous_writes
) -> None:
"""Test to send a SIGKILL to the primary db process and ensure that the cluster self heals."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

logger.info("Waiting until 3 mysql instances are online")
# ensure all units in the cluster are online
Expand Down Expand Up @@ -109,12 +105,14 @@ async def test_kill_db_process(ops_test: OpsTest, continuous_writes) -> None:
await clean_up_database_and_table(ops_test, database_name, table_name)


@pytest.mark.group(1)
@pytest.mark.group(2)
@pytest.mark.abort_on_fail
@pytest.mark.unstable
async def test_freeze_db_process(ops_test: OpsTest, continuous_writes) -> None:
async def test_freeze_db_process(
ops_test: OpsTest, highly_available_cluster, continuous_writes
) -> None:
"""Test to send a SIGSTOP to the primary db process and ensure that the cluster self heals."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

# ensure all units in the cluster are online
assert await ensure_n_online_mysql_members(
Expand Down Expand Up @@ -230,11 +228,13 @@ async def test_freeze_db_process(ops_test: OpsTest, continuous_writes) -> None:
await ensure_all_units_continuous_writes_incrementing(ops_test)


@pytest.mark.group(1)
@pytest.mark.group(3)
@pytest.mark.abort_on_fail
async def test_graceful_crash_of_primary(ops_test: OpsTest, continuous_writes) -> None:
async def test_graceful_crash_of_primary(
ops_test: OpsTest, highly_available_cluster, continuous_writes
) -> None:
"""Test to send SIGTERM to primary instance and then verify recovery."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

logger.info("Ensuring that there are 3 online mysql members")
assert await ensure_n_online_mysql_members(
Expand Down Expand Up @@ -293,13 +293,13 @@ async def test_graceful_crash_of_primary(ops_test: OpsTest, continuous_writes) -
await ensure_all_units_continuous_writes_incrementing(ops_test)


@pytest.mark.group(1)
@pytest.mark.group(4)
@pytest.mark.abort_on_fail
async def test_network_cut_affecting_an_instance(
ops_test: OpsTest, continuous_writes, chaos_mesh
ops_test: OpsTest, highly_available_cluster, continuous_writes, chaos_mesh
) -> None:
"""Test for a network cut affecting an instance."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

logger.info("Ensuring that there are 3 online mysql members")
assert await ensure_n_online_mysql_members(
Expand Down Expand Up @@ -377,12 +377,14 @@ async def test_network_cut_affecting_an_instance(
await ensure_all_units_continuous_writes_incrementing(ops_test)


@pytest.mark.group(1)
@pytest.mark.group(5)
@pytest.mark.abort_on_fail
@pytest.mark.unstable
async def test_graceful_full_cluster_crash_test(ops_test: OpsTest, continuous_writes) -> None:
async def test_graceful_full_cluster_crash_test(
ops_test: OpsTest, highly_available_cluster, continuous_writes
) -> None:
"""Test to send SIGTERM to all units and then ensure that the cluster recovers."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

logger.info("Ensure there are 3 online mysql members")
assert await ensure_n_online_mysql_members(
Expand Down Expand Up @@ -448,11 +450,11 @@ async def test_graceful_full_cluster_crash_test(ops_test: OpsTest, continuous_wr
await ensure_all_units_continuous_writes_incrementing(ops_test)


@pytest.mark.group(1)
@pytest.mark.group(6)
@pytest.mark.abort_on_fail
async def test_single_unit_pod_delete(ops_test: OpsTest) -> None:
async def test_single_unit_pod_delete(ops_test: OpsTest, highly_available_cluster) -> None:
"""Delete the pod in a single unit deployment and write data to new pod."""
mysql_application_name, _ = await high_availability_test_setup(ops_test)
mysql_application_name = get_application_name(ops_test, "mysql")

logger.info("Scale mysql application to 1 unit that is active")
async with ops_test.fast_forward("60s"):
Expand Down

0 comments on commit 0934aba

Please sign in to comment.