Skip to content

Commit

Permalink
jobs on stopping/stopped should delay exit on global shutdown (#421)
Browse files Browse the repository at this point in the history
  • Loading branch information
tgross authored Jun 30, 2017
1 parent bbe6bdd commit b3eadf7
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 21 deletions.
2 changes: 2 additions & 0 deletions docs/30-configuration/34-jobs.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Every job will emit events associated with the lifecycle of its process. Any job
- `stopping`: emitted when the job is asked to stop but before it does so. Useful when the job has a [stop timeout](#stop-timeout).
- `stopped`: emitted when the job is stopped. Note that this is not the same as the process exiting because a job might have many executions of its process.

Note that although `stopping` and `stopped` events are emitted for each running job when ContainerPilot is shutting down, the receiving job will have a limited window in which to execute. This window is 5 seconds, in order to provide enough time for ContainerPilot to halt all jobs, gracefully shut down its own listeners, and exit within the default Docker shutdown timeout of 10 seconds. After this point all processes receive a `SIGKILL` and are forced to exit immediately.

Additionally, jobs may react to these events:

- `startup`: published to all jobs when ContainerPilot is ready to start.
Expand Down
35 changes: 35 additions & 0 deletions integration_tests/tests/test_sigterm/containerpilot.json5
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
consul: "consul:8500",
logging: {
level: "DEBUG",
format: "text"
},
jobs: [
{
name: "app",
port: 8000,
exec: "sleep 60",
health: {
exec: "true",
interval: 1,
ttl: 5,
},
},
{
name: "preStop",
exec: "echo 'preStop fired on app stopping'",
when: {
source: "app",
once: "stopping"
},
},
{
name: "postStop",
exec: "echo 'postStop fired on app stopped'",
when: {
source: "app",
once: "stopped"
},
}
]
}
3 changes: 2 additions & 1 deletion integration_tests/tests/test_sigterm/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ services:

app:
image: "cpfix_app"
mem_limit: 512m
mem_limit: 128m
links:
- consul:consul
volumes:
- './containerpilot.json5:/etc/containerpilot.json5'
- '${CONTAINERPILOT_BIN}:/bin/containerpilot:ro'

test:
Expand Down
42 changes: 23 additions & 19 deletions integration_tests/tests/test_sigterm/run.sh
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
#!/bin/bash

docker-compose up -d consul app > /dev/null 2>&1
set -e

function finish {
local result=$?
if [ $result -ne 0 ]; then
echo '----- APP LOGS ------'
docker logs "$APP_ID" | tee app.log
echo '---------------------'
fi
exit $result
}

trap finish EXIT

docker-compose up -d consul app

# Wait for consul to elect a leader
docker-compose run --no-deps test /go/bin/test_probe test_consul > /dev/null 2>&1
if [ ! $? -eq 0 ] ; then exit 1 ; fi
docker-compose run --no-deps test /go/bin/test_probe test_consul

APP_ID="$(docker-compose ps -q app)"
docker-compose run --no-deps test /go/bin/test_probe test_sigterm "$APP_ID" > /dev/null 2>&1
result=$?

CONSUL_ID="$(docker-compose ps -q consul)"
TEST_ID=$(docker ps -l -f "ancestor=cpfix_test_probe" --format="{{.ID}}")

if [ $result -ne 0 ]; then
echo '----- TEST LOGS ------'
docker logs "$TEST_ID" | tee test.log
echo '----- APP LOGS ------'
docker logs "$APP_ID" | tee app.log
echo '----- CONSUL LOGS ------'
docker logs "$CONSUL_ID" | tee consul.log
echo '---------------------'
fi
exit $result
docker-compose run --no-deps test /go/bin/test_probe test_sigterm "$APP_ID"

# verify preStop fired
docker logs "$APP_ID" | grep "msg=\"'preStop fired on app stopping"

# # verify postStop fired
docker logs "$APP_ID" | grep "msg=\"'postStop fired on app stopped"
10 changes: 9 additions & 1 deletion jobs/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ func (job *Job) Run(bus *events.EventBus) {
}

func (job *Job) processEvent(ctx context.Context, event events.Event) bool {

runEverySource := fmt.Sprintf("%s.run-every", job.Name)
heartbeatSource := fmt.Sprintf("%s.heartbeat", job.Name)
startTimeoutSource := fmt.Sprintf("%s.wait-timeout", job.Name)
Expand Down Expand Up @@ -258,6 +257,15 @@ func (job *Job) processEvent(ctx context.Context, event events.Event) bool {
events.Event{events.Quit, job.Name},
events.QuitByClose,
events.GlobalShutdown:
if (job.startEvent.Code == events.Stopping ||
job.startEvent.Code == events.Stopped) &&
job.exec != nil {
// "pre-stop" and "post-stop" style jobs ignore the global
// shutdown and return on their ExitSuccess/ExitFailed.
// if the stop timeout on the global shutdown is exceeded
// the whole process gets SIGKILL
break
}
return true
case events.GlobalEnterMaintenance:
job.MarkForMaintenance()
Expand Down

0 comments on commit b3eadf7

Please sign in to comment.