Skip to content

Commit e74e138

Browse files
committed
Refactor worker restart to prevent issues with periodic tests
1 parent ad33b86 commit e74e138

File tree

4 files changed

+41
-4
lines changed

4 files changed

+41
-4
lines changed
Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
#!/bin/sh
22
set -e
3-
# find nassl worker and restart the container(s)
4-
docker ps --filter label=com.docker.compose.service=worker-nassl --quiet | xargs --no-run-if-empty docker restart
3+
# stop and start worker one at a time to ensure (batch) tasks are still being picked up
4+
# workers are sent a TERM signal with which a 10 minute grace period before QUIT is sent
5+
for worker in $(docker ps --filter label=com.docker.compose.service=worker-nassl --quiet); do
6+
docker stop "$worker"
7+
docker start "$worker"
8+
done
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/sh
2+
3+
# restart slow worker every day to prevent slow memory leaks
4+
# as the slow worker can run very long tasks (eg: report generation)
5+
# we first start a new container before stopping the previous one
6+
7+
set -e
8+
9+
cd /opt/Internet.nl
10+
11+
SERVICE=worker-slow
12+
REPLICAS=$WORKER_SLOW_REPLICAS
13+
COMPOSE_CMD="docker compose --env-file=docker/defaults.env --env-file=docker/host.env --env-file=docker/local.env"
14+
15+
OLD_CONTAINERS=$($COMPOSE_CMD ps --format "{{ .Name }}"|grep "$SERVICE")
16+
17+
# bring up new containers, wait until healthy
18+
$COMPOSE_CMD up --no-deps --no-recreate --wait --scale="$SERVICE=$(($REPLICAS*2))" "$SERVICE"
19+
20+
# graceful shutdown and remove old containers
21+
docker rm --force "$OLD_CONTAINERS"
22+
23+
# restore replica number to original
24+
$COMPOSE_CMD scale $SERVICE=$REPLICAS
Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
#!/bin/sh
22
set -e
3-
# find worker and restart the container(s)
4-
docker ps --filter label=com.docker.compose.service=worker --quiet | xargs --no-run-if-empty docker restart
3+
# stop and start worker one at a time to ensure (batch) tasks are still being picked up
4+
# workers are sent a TERM signal with which a 10 minute grace period before QUIT is sent
5+
for worker in $(docker ps --filter label=com.docker.compose.service=worker --quiet); do
6+
docker stop "$worker"
7+
docker start "$worker"
8+
done

docker/docker-compose.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,8 @@ services:
249249
# time after which a SIGKILL is sent to celery after a SIGTERM (warm shutdown), default 10s
250250
# insufficient short grace period causes issues on batch when tasks are killed during the hourly worker restart
251251
stop_grace_period: 10m
252+
# SIGTERM is default, but make it explicit
253+
stop_signal: SIGTERM
252254

253255
depends_on:
254256
db-migrate:
@@ -735,6 +737,9 @@ services:
735737
command: crond -f -d7 -c /etc/crontabs-docker
736738
environment:
737739
- AUTO_UPDATE_TO
740+
- WORKER_SLOW_REPLICAS
741+
- WORKER_REPLICAS
742+
- RELEASE
738743

739744
restart: unless-stopped
740745
logging:

0 commit comments

Comments
 (0)