Skip to content
This repository has been archived by the owner on Oct 23, 2024. It is now read-only.

Commit

Permalink
External Volumes Integration Tests for Hello-World and Cassandra. (#3322
Browse files Browse the repository at this point in the history
)

SDK:
* Add improved logging to indicate Pod-Replacement policy has been used.

Hello-World:
* Add scenario with external-volumes and basic integration test.

Cassandra:
* Add flag to enable/disable Pod-Replacement failure policy.
* Remove mentions of Portworx from service spec, switch NetApp to Generic Driver.
* Add basic integration test.
  • Loading branch information
kaiwalyajoshi authored Nov 10, 2020
1 parent c53d4fe commit 6982478
Show file tree
Hide file tree
Showing 18 changed files with 476 additions and 87 deletions.
2 changes: 1 addition & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def configure_universe(tmpdir_factory):

@pytest.fixture(scope="session", autouse=True)
def configure_external_volumes():
if is_env_var_set("ENABLE_EXTERNAL_VOLUMES", default=""):
if is_env_var_set("ENABLE_EXTERNAL_VOLUMES", default=str(False)):
yield from sdk_external_volumes.external_volumes_session()
else:
yield
Expand Down
2 changes: 1 addition & 1 deletion frameworks/cassandra/src/main/dist/svc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pods:
type: DOCKER
container-path: container-path
driver-name: {{CASSANDRA_EXTERNAL_VOLUME_DRIVER_NAME}}
driver-options: '{{{CASSANDRA_EXTERNAL_VOLUME_PORTWORX_OPTIONS}}}'
driver-options: '{{{CASSANDRA_EXTERNAL_VOLUME_DRIVER_OPTIONS}}}'
{{#CASSANDRA_EXTERNAL_VOLUME_NAME}}
volume-name: {{CASSANDRA_EXTERNAL_VOLUME_NAME}}
{{/CASSANDRA_EXTERNAL_VOLUME_NAME}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.mesosphere.sdk.cassandra.api.SeedsResource;
import com.mesosphere.sdk.config.validate.TaskEnvCannotChange;
import com.mesosphere.sdk.framework.EnvStore;
import com.mesosphere.sdk.scheduler.DefaultScheduler;
import com.mesosphere.sdk.scheduler.SchedulerBuilder;
import com.mesosphere.sdk.scheduler.SchedulerConfig;
Expand Down Expand Up @@ -32,18 +33,19 @@ public final class Main {
private Main() {}

public static void main(String[] args) throws Exception {
final EnvStore envStore = EnvStore.fromEnv();
if (args.length != 1) {
throw new IllegalArgumentException(
"Expected one file argument, got: " + Arrays.toString(args)
);
}
SchedulerRunner
.fromSchedulerBuilder(createSchedulerBuilder(new File(args[0])))
.fromSchedulerBuilder(createSchedulerBuilder(new File(args[0]), envStore))
.run();
}

@SuppressWarnings("checkstyle:MultipleStringLiterals")
private static SchedulerBuilder createSchedulerBuilder(File yamlSpecFile) throws Exception {
private static SchedulerBuilder createSchedulerBuilder(File yamlSpecFile, EnvStore envStore) throws Exception {
SchedulerConfig schedulerConfig = SchedulerConfig.fromEnv();
RawServiceSpec rawServiceSpec = RawServiceSpec.newBuilder(yamlSpecFile).build();
List<String> localSeeds = CassandraSeedUtils
Expand All @@ -64,7 +66,7 @@ private static SchedulerBuilder createSchedulerBuilder(File yamlSpecFile) throws
}

DefaultServiceSpec serviceSpec = DefaultServiceSpec.newBuilder(serviceSpecGenerator.build())
.replacementFailurePolicy(getReplacementFailurePolicy())
.replacementFailurePolicy(getReplacementFailurePolicy(envStore))
.build();

return DefaultScheduler.newBuilder(serviceSpec, schedulerConfig)
Expand All @@ -82,13 +84,15 @@ private static SchedulerBuilder createSchedulerBuilder(File yamlSpecFile) throws
.withSingleRegionConstraint();
}

private static ReplacementFailurePolicy getReplacementFailurePolicy() throws Exception {
return ReplacementFailurePolicy.newBuilder()
.permanentFailureTimoutSecs(
Integer.valueOf(System.getenv("PERMANENT_FAILURE_TIMEOUT_SECS")))
.minReplaceDelaySecs(
Integer.valueOf(System.getenv("MIN_REPLACE_DELAY_SECS")))
.build();
private static ReplacementFailurePolicy getReplacementFailurePolicy(EnvStore envStore) throws Exception {
if (envStore.getOptionalBoolean("ENABLE_AUTOMATIC_POD_REPLACEMENT", false)) {
return ReplacementFailurePolicy.newBuilder()
.permanentFailureTimoutSecs(Integer.valueOf(System.getenv("PERMANENT_FAILURE_TIMEOUT_SECS")))
.minReplaceDelaySecs(Integer.valueOf(System.getenv("MIN_REPLACE_DELAY_SECS")))
.build();
} else {
return null;
}
}

private static Collection<Object> getResources(List<String> localSeeds) {
Expand Down
7 changes: 7 additions & 0 deletions frameworks/cassandra/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
from typing import Iterator

import pytest
import sdk_external_volumes
import sdk_security
from tests import config


@pytest.fixture(scope="session")
def configure_security(configure_universe: None) -> Iterator[None]:
yield from sdk_security.security_session(config.SERVICE_NAME)


@pytest.fixture(scope="session")
def configure_external_volumes():
# Handle creation of external volumes.
yield from sdk_external_volumes.external_volumes_session()
78 changes: 78 additions & 0 deletions frameworks/cassandra/tests/test_external_volumes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging
import pytest
import re

import sdk_agents
import sdk_install
import sdk_plan
import sdk_tasks
from tests import config

log = logging.getLogger(__name__)


@pytest.fixture(scope="module", autouse=True)
def configure_package(configure_security):
try:
sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
yield # let the test session execute
finally:
sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)


@pytest.mark.external_volumes
@pytest.mark.sanity
@pytest.mark.dcos_min_version("2.1")
def test_default_deployment():
# Test default installation with external volumes.
# Ensure service comes up successfully.
options = {
"nodes": {"external_volume": {"enabled": True}},
}
sdk_install.install(
config.PACKAGE_NAME,
config.SERVICE_NAME,
3,
additional_options=options,
wait_for_deployment=True,
)
# Wait for scheduler to restart.
sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)


@pytest.mark.skip(reason="Conflicts with Cassandra Custom Recovery Manager")
@pytest.mark.sanity
def test_auto_replace_on_drain():
candidate_tasks = sdk_tasks.get_tasks_avoiding_scheduler(
config.SERVICE_NAME, re.compile("^node-[0-9]+-server$")
)

assert len(candidate_tasks) != 0, "Could not find a node to drain"

# Pick the host of the first task from the above list
replace_agent_id = candidate_tasks[0].agent_id
replace_tasks = [task for task in candidate_tasks if task.agent_id == replace_agent_id]
log.info(
"Tasks on agent {} to be replaced after drain: {}".format(replace_agent_id, replace_tasks)
)
sdk_agents.drain_agent(replace_agent_id)

sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME)
sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)

new_tasks = sdk_tasks.get_summary()

for replaced_task in replace_tasks:
new_task = [
task
for task in new_tasks
if task.name == replaced_task.name and task.id != replaced_task.id
][0]
log.info(
"Checking affected task has moved to a new agent:\n"
"old={}\nnew={}".format(replaced_task, new_task)
)
assert replaced_task.agent_id != new_task.agent_id

# Reactivate the drained agent, otherwise uninstall plans will be halted for portworx
sdk_agents.reactivate_agent(replace_agent_id)
41 changes: 26 additions & 15 deletions frameworks/cassandra/universe/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,6 @@
"type": "string",
"default": ""
},
"permanent-failure-timeout-secs": {
"type": "integer",
"description": "Time in seconds to wait before declaring a task as permanently failed.",
"default": 120
},
"min-replace-delay-secs": {
"type": "integer",
"description": "Time to wait between destructive task recoveries.",
"default": 240
},
"log_level": {
"description": "The log level for the DC/OS service.",
"type": "string",
Expand Down Expand Up @@ -441,6 +431,27 @@
"minimum": 15
}
}
},
"pod-replacement-failure-policy": {
"description": "Options relating to automatic pod-replacement failure policies.",
"type": "object",
"properties": {
"enable-automatic-pod-replacement": {
"description": "Determines whether pods should be replaced automatically on failure.",
"type": "boolean",
"default": false
},
"permanent-failure-timeout-secs": {
"description": "Default time to wait before declaring a pod as permanently failed in seconds.",
"type": "integer",
"default": 120
},
"min-replace-delay-secs": {
"description": "Default time to wait between successive pod-replace operations in seconds.",
"type": "integer",
"default": 240
}
}
}
},
"required": [
Expand Down Expand Up @@ -495,18 +506,18 @@
}
},
"external_volume": {
"description": "Cassandra external volume configuration.",
"type": "object",
"description": "The Cassandra external volume configuration.\nOnly Portworx external volumes are supported.",
"properties": {
"enabled": {
"type": "boolean",
"description": "If true, external profile will be used.",
"description": "If true, external volumes will be used.",
"default": false
},
"portworx_volume_options": {
"driver_options": {
"type": "string",
"default": "size=10",
"description": "Volume options."
"description": "External Volume storage provider options."
},
"volume_name": {
"type": "string",
Expand All @@ -515,7 +526,7 @@
},
"driver_name": {
"type": "string",
"description": "Docker volume driver name.",
"description": "External Volume storage provider to use.",
"default": "pxd"
}
}
Expand Down
10 changes: 7 additions & 3 deletions frameworks/cassandra/universe/marathon.json.mustache
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@
"FRAMEWORK_LOG_LEVEL": "{{service.log_level}}",
"CASSANDRA_VERSION": "3.11.6",
"S3CLI_VERSION": "s3cli-0.0.55-linux-amd64",
"PERMANENT_FAILURE_TIMEOUT_SECS": "{{service.permanent-failure-timeout-secs}}",
"MIN_REPLACE_DELAY_SECS": "{{service.min-replace-delay-secs}}",
{{#service.service_account_secret}}
"DCOS_SERVICE_ACCOUNT_CREDENTIAL": "secrets/service-account.json",
Expand Down Expand Up @@ -112,10 +110,16 @@
{{#nodes.volume_profile}}
"CASSANDRA_VOLUME_PROFILE": "{{nodes.volume_profile}}",
{{/nodes.volume_profile}}

"CASSANDRA_EXTERNAL_VOLUME_ENABLED" : "{{nodes.external_volume.enabled}}",
"CASSANDRA_EXTERNAL_VOLUME_PORTWORX_OPTIONS" : "{{nodes.external_volume.portworx_volume_options}}",
"CASSANDRA_EXTERNAL_VOLUME_DRIVER_OPTIONS" : "{{nodes.external_volume.driver_options}}",
"CASSANDRA_EXTERNAL_VOLUME_NAME" : "{{nodes.external_volume.volume_name}}",
"CASSANDRA_EXTERNAL_VOLUME_DRIVER_NAME" : "{{nodes.external_volume.driver_name}}",

"ENABLE_AUTOMATIC_POD_REPLACEMENT": "{{service.pod-replacement-failure-policy.enable-automatic-pod-replacement}}",
"PERMANENT_FAILURE_TIMEOUT_SECS": "{{service.pod-replacement-failure-policy.permanent-failure-timeout-secs}}",
"MIN_REPLACE_DELAY_SECS": "{{service.pod-replacement-failure-policy.min-replace-delay-secs}}",

"TASKCFG_ALL_CASSANDRA_HEAP_SIZE_MB": "{{nodes.heap.size}}",
"TASKCFG_ALL_CASSANDRA_HEAP_NEW_MB": "{{nodes.heap.new}}",
"CASSANDRA_JAVA_URI": "{{resource.assets.uris.cassandra-jre-tar-gz}}",
Expand Down
82 changes: 82 additions & 0 deletions frameworks/helloworld/src/main/dist/external-volumes.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
name: {{FRAMEWORK_NAME}}
scheduler:
principal: {{FRAMEWORK_PRINCIPAL}}
user: {{FRAMEWORK_USER}}
pods:
hello:
count: {{HELLO_COUNT}}
placement: '{{{HELLO_PLACEMENT}}}'
external-volumes:
hello-volume:
type: DOCKER
volume-mode: RW
container-path: hello-container-path
driver-name: {{EXTERNAL_VOLUME_DRIVER_NAME}}
driver-options: {{EXTERNAL_VOLUME_DRIVER_OPTIONS}}
{{#HELLO_EXTERNAL_VOLUME_NAME}}
volume-name: {{HELLO_EXTERNAL_VOLUME_NAME}}
{{/HELLO_EXTERNAL_VOLUME_NAME}}
tasks:
server:
goal: RUNNING
cmd: env && echo hello >> hello-container-path/output && sleep $SLEEP_DURATION
cpus: {{HELLO_CPUS}}
memory: {{HELLO_MEM}}
env:
SLEEP_DURATION: {{SLEEP_DURATION}}
health-check:
cmd: stat hello-container-path/output
interval: 5
grace-period: 30
delay: 0
timeout: 10
max-consecutive-failures: 3
labels: {{HELLO_LABELS}}
world:
count: {{WORLD_COUNT}}
allow-decommission: true
placement: '{{{WORLD_PLACEMENT}}}'
external-volumes:
world-volume:
type: DOCKER
volume-mode: RW
container-path: world-container-path
driver-name: {{EXTERNAL_VOLUME_DRIVER_NAME}}
driver-options: {{EXTERNAL_VOLUME_DRIVER_OPTIONS}}
{{#WORLD_EXTERNAL_VOLUME_NAME}}
volume-name: {{WORLD_EXTERNAL_VOLUME_NAME}}
{{/WORLD_EXTERNAL_VOLUME_NAME}}
tasks:
server:
goal: RUNNING
cmd: |
# for graceful shutdown
# trap SIGTERM and mock a cleanup timeframe
terminated () {
echo "$(date) received SIGTERM, zzz for 3 ..."
sleep 3
echo "$(date) ... all clean, peace out"
exit 0
}
trap terminated SIGTERM
echo "$(date) trapping SIGTERM, watch here for the signal..."
echo "${TASK_NAME}" >>world-container-path/output &&
# instead of running for a short duration (equal to SLEEP_DURATION), run infinitely
# to allow for testing of SIGTERM..grace..SIGKILL
while true; do
sleep 0.1
done
cpus: {{WORLD_CPUS}}
memory: {{WORLD_MEM}}
env:
SLEEP_DURATION: {{SLEEP_DURATION}}
readiness-check:
# wordcount (wc) will report an error if the file does not exist, which effectively is zero (0) bytes
# so send the error to /dev/null, BUT also zero-left-pad the variable BYTES to ensure that it is zero
# on empty for comparison sake.
cmd: BYTES="$(wc -c world-container-path/output 2>/dev/null| awk '{print $1;}')" && [ 0$BYTES -gt 0 ]
interval: {{WORLD_READINESS_CHECK_INTERVAL}}
delay: {{WORLD_READINESS_CHECK_DELAY}}
timeout: {{WORLD_READINESS_CHECK_TIMEOUT}}
kill-grace-period: {{WORLD_KILL_GRACE_PERIOD}}
4 changes: 2 additions & 2 deletions frameworks/helloworld/src/main/dist/multiport.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ pods:
- {{BOOTSTRAP_URI}}
resource-sets:
multi-port-resources:
cpus: {{HELLO_CPUS}}
memory: {{HELLO_MEM}}
ports:
port_one:
port: {{HELLO_PORT_ONE}}
Expand Down Expand Up @@ -51,7 +53,5 @@ pods:
sum=$(($exit_1+$exit_2))
echo "exit codes : ${exit_1} ${exit_2} and sum : ${sum}"
exit $sum
cpus: {{HELLO_CPUS}}
memory: {{HELLO_MEM}}
env:
HELLO_PORT_ONE: {{HELLO_PORT_ONE}}
Loading

0 comments on commit 6982478

Please sign in to comment.