Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DPE-2277 Fixes for network partition test #272

Merged
merged 3 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions tests/integration/high_availability/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ async def continuous_writes(ops_test: OpsTest) -> None:
await clear_writes_action.wait()


@pytest.fixture()
async def chaos_mesh(ops_test: OpsTest) -> None:
@pytest.fixture(scope="function")
paulomach marked this conversation as resolved.
Show resolved Hide resolved
def chaos_mesh(ops_test: OpsTest) -> None:
"""Deploys chaos mesh to the namespace and uninstalls it at the end."""
logger.info("Deploying chaos mesh")
deploy_chaos_mesh(ops_test.model.info.name)

yield
Expand Down
27 changes: 23 additions & 4 deletions tests/integration/high_availability/high_availability_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ def deploy_chaos_mesh(namespace: str) -> None:
"""
env = os.environ
env["KUBECONFIG"] = os.path.expanduser("~/.kube/config")
logger.info("Deploying Chaos Mesh")

subprocess.check_output(
" ".join(
Expand All @@ -253,6 +254,18 @@ def deploy_chaos_mesh(namespace: str) -> None:
shell=True,
env=env,
)
logger.info("Ensure chaos mesh is ready")
try:
for attempt in Retrying(stop=stop_after_delay(5 * 60), wait=wait_fixed(10)):
with attempt:
output = subprocess.check_output(
f"kubectl get pods --namespace {namespace} -l app.kubernetes.io/instance=chaos-mesh".split(),
env=env,
)
assert output.decode().count("Running") == 4, "Chaos Mesh not ready"

except RetryError:
raise Exception("Chaos Mesh pods not found")


def destroy_chaos_mesh(namespace: str) -> None:
Expand Down Expand Up @@ -493,7 +506,7 @@ async def ensure_all_units_continuous_writes_incrementing(

def isolate_instance_from_cluster(ops_test: OpsTest, unit_name: str) -> None:
"""Apply a NetworkChaos file to use chaos-mesh to simulate a network cut."""
with tempfile.NamedTemporaryFile() as temp_file:
with tempfile.NamedTemporaryFile(dir=os.getenv("HOME")) as temp_file:
with open(
"tests/integration/high_availability/manifests/chaos_network_loss.yml", "r"
) as chaos_network_loss_file:
Expand All @@ -508,9 +521,15 @@ def isolate_instance_from_cluster(ops_test: OpsTest, unit_name: str) -> None:

env = os.environ
env["KUBECONFIG"] = os.path.expanduser("~/.kube/config")
subprocess.check_output(
" ".join(["kubectl", "apply", "-f", temp_file.name]), shell=True, env=env
)

try:
subprocess.check_output(
" ".join(["kubectl", "apply", "-f", temp_file.name]), shell=True, env=env
paulomach marked this conversation as resolved.
Show resolved Hide resolved
)
except subprocess.CalledProcessError as e:
logger.error(e.output)
logger.error(e.stderr)
raise


def remove_instance_isolation(ops_test: OpsTest) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ destroy_chaos_mesh() {
timeout 30 kubectl delete "${i}" --all --all-namespaces || true
done

if kubectl -n "${chaos_mesh_ns}" get mutatingwebhookconfiguration | grep -q 'choas-mesh-mutation'; then
timeout 30 kubectl -n "${chaos_mesh_ns}" delete mutatingwebhookconfiguration chaos-mesh-mutation || true
if kubectl get mutatingwebhookconfiguration | grep -q 'chaos-mesh-mutation'; then
timeout 30 kubectl delete mutatingwebhookconfiguration chaos-mesh-mutation || true
fi

if kubectl -n "${chaos_mesh_ns}" get validatingwebhookconfiguration | grep -q 'chaos-mesh-validation'; then
timeout 30 kubectl -n "${chaos_mesh_ns}" delete validatingwebhookconfiguration chaos-mesh-validation || true
if kubectl get validatingwebhookconfiguration | grep -q 'chaos-mesh-validation-auth'; then
timeout 30 kubectl delete validatingwebhookconfiguration chaos-mesh-validation-auth || true
fi

if kubectl -n "${chaos_mesh_ns}" get validatingwebhookconfiguration | grep -q 'chaos-mesh-validate-auth'; then
timeout 30 kubectl -n "${chaos_mesh_ns}" delete validatingwebhookconfiguration chaos-mesh-validate-auth || true
if kubectl get validatingwebhookconfiguration | grep -q 'chaos-mesh-validation'; then
timeout 30 kubectl delete validatingwebhookconfiguration chaos-mesh-validation || true
fi

if kubectl get clusterrolebinding | grep -q 'chaos-mesh'; then
Expand Down