forked from keycloak/keycloak-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
90f6dca
commit 1b94694
Showing
5 changed files
with
228 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#!/bin/bash | ||
# Script simulating different xsite failover scenarios | ||
set -e | ||
|
||
if [[ "$RUNNER_DEBUG" == "1" ]]; then | ||
set -x | ||
fi | ||
|
||
function activeClusterDown() { | ||
DOMAIN=$1 | ||
CLIENT_IPS=$(dig +short client.${DOMAIN} | sort) | ||
PRIMARY_IPS=$(dig +short primary.${DOMAIN} | sort) | ||
BACKUP_IPS=$(dig +short backup.${DOMAIN} | sort) | ||
|
||
[[ "${CLIENT_IPS}" == "${BACKUP_IPS}" && "${CLIENT_IPS}" != "${PRIMARY_IPS}" ]] | ||
return | ||
} | ||
|
||
# Removes the Keycloak aws-health-route so that Route53 will eventually failover | ||
function killHealthRoute() { | ||
kubectl -n ${PROJECT} delete route aws-health-route || true | ||
} | ||
|
||
# Remove all Keycloak routes so that Route53 will failover from the active to the passive cluster and the old DNS ips will fail | ||
function killKeycloakRoutes() { | ||
kubectl -n ${PROJECT} scale --replicas=0 deployment/keycloak-operator | ||
kubectl -n ${PROJECT} rollout status --watch --timeout=600s statefulset/keycloak | ||
kubectl -n ${PROJECT} delete ingress keycloak-ingress || true | ||
killHealthRoute | ||
} | ||
|
||
# Delete the Keycloak + Infinispan pods to simulate cluster crash | ||
function killKeycloakCluster() { | ||
kubectl -n openshift-operators scale --replicas=0 deployment/infinispan-operator-controller-manager | ||
kubectl -n ${PROJECT} scale --replicas=0 deployment/keycloak-operator | ||
kubectl -n ${PROJECT} delete pods --all --force --grace-period=0 | ||
kubectl -n ${PROJECT} delete statefulset --all | ||
} | ||
|
||
# Scale Infinispan and Keycloak Operators so that the original cluster is recreated | ||
function reviveKeycloakCluster() { | ||
echo -e "\033[0;31mINFO:$(date '+%F-%T-%Z') Running Recovery scenario - ${RECOVERY_MODE}\033[0m" | ||
cat << EOF | kubectl -n ${PROJECT} apply -f - | ||
apiVersion: route.openshift.io/v1 | ||
kind: Route | ||
metadata: | ||
name: aws-health-route | ||
spec: | ||
host: "$1.${DOMAIN}" | ||
port: | ||
targetPort: https | ||
tls: | ||
insecureEdgeTerminationPolicy: Redirect | ||
termination: passthrough | ||
to: | ||
kind: Service | ||
name: keycloak-service | ||
EOF | ||
kubectl -n openshift-operators scale --replicas=1 deployment/infinispan-operator-controller-manager | ||
kubectl -n ${PROJECT} scale --replicas=1 deployment/keycloak-operator | ||
kubectl -n ${PROJECT} rollout status --watch --timeout=600s statefulset/infinispan | ||
kubectl -n ${PROJECT} rollout status --watch --timeout=600s statefulset/keycloak | ||
exit | ||
} | ||
|
||
function waitForFailover() { | ||
START=$(date +%s) | ||
until activeClusterDown ${DOMAIN} | ||
do | ||
sleep 0.1 | ||
done | ||
END=$(date +%s) | ||
DIFF=$(( END - START )) | ||
|
||
echo -e "\033[0;31mINFO:$(date '+%F-%T-%Z') Route53 took ${DIFF} seconds to failover\033[0m" | ||
} | ||
|
||
function clusterFailover() { | ||
killKeycloakCluster | ||
} | ||
|
||
: ${PROJECT:="runner-keycloak"} | ||
: ${FAILOVER_DELAY:=60} | ||
|
||
PROJECT=${PROJECT:-"runner-keycloak"} | ||
|
||
if [ -z "${RECOVERY_MODE}" ] && [ -z "${FAILOVER_MODE}" ]; then | ||
echo "RECOVERY_MODE or FAILOVER_MODE env must be defined" | ||
exit 1 | ||
fi | ||
|
||
if [ -z "${DOMAIN}" ]; then | ||
echo "DOMAIN env must be defined" | ||
exit 1 | ||
fi | ||
|
||
if [ -n "${RECOVERY_MODE}" ]; then | ||
if [ "${RECOVERY_MODE^^}" == "ACTIVE" ]; then | ||
reviveKeycloakCluster primary | ||
elif [ "${RECOVERY_MODE^^}" == "PASSIVE" ]; then | ||
reviveKeycloakCluster backup | ||
else | ||
echo "Unknown RECOVERY_MODE=${RECOVERY_MODE}" | ||
exit 1 | ||
fi | ||
fi | ||
|
||
echo -e "\033[0;31mINFO:$(date '+%F-%T-%Z') Entering Failover mode, with an initial delay of ${FAILOVER_DELAY} seconds\033[0m" | ||
sleep ${FAILOVER_DELAY} | ||
echo -e "\033[0;31mINFO:$(date '+%F-%T-%Z') Running Failover scenario - ${FAILOVER_MODE}\033[0m" | ||
|
||
CLIENT_IPS=$(dig +short client.${DOMAIN} | sort) | ||
PRIMARY_IPS=$(dig +short primary.${DOMAIN} | sort) | ||
BACKUP_IPS=$(dig +short backup.${DOMAIN} | sort) | ||
|
||
if [ "${FAILOVER_MODE^^}" == "HEALTH_PROBE" ]; then | ||
killHealthRoute | ||
elif [ "${FAILOVER_MODE^^}" == "ALL_ROUTES" ]; then | ||
killKeycloakRoutes | ||
elif [ "${FAILOVER_MODE^^}" == "CLUSTER_FAIL" ]; then | ||
killKeycloakCluster | ||
fi | ||
|
||
waitForFailover |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
= Simulate Keycloak Site Failover | ||
:description: How to automate the simulation of Keycloak site failure. | ||
|
||
== Prerequisites: | ||
|
||
* A Keycloak instance replicated across two Openshift clusters with Infinispan xsite and an Aurora DB | ||
* Realm, user and client exist with the values required by the benchmark CLI command | ||
|
||
== Running the failure test from the CLI | ||
|
||
=== Preparations | ||
|
||
* Extract the `+keycloak-benchmark-${version}.[zip|tar.gz]+` file | ||
* xref:benchmark-guide::preparing-keycloak.adoc[] | ||
* Make sure your local KUBECONFIG is set to the Openshift cluster which you want to fail. | ||
|
||
=== Parameters | ||
|
||
The failover scripts requires the following env variables to be set; `FAILOVER_MODE` and `DOMAIN`. | ||
|
||
The `FAILOVER_MODE` determines the type of failover that is initiated by the script and can be one of the following values: | ||
[cols='1,3'] | ||
|=== | ||
| FAILOVER_MODE | Description | ||
|
||
| [.nowrap]`HEALTH_PROBE` | ||
| Deletes the Keycloak aws-health-route so that Route53 will eventually failover. | ||
|
||
| [.nowrap]`ALL_ROUTES` | ||
| Deletes all Keycloak routes so that Route53 will eventually failover, but requests to the old DNS IP addresses will fail. | ||
The Keycloak Operator is scaled down to 0 pods to prevent the Keycloak Ingress from being recreated. | ||
|
||
| [.nowrap]`CLUSTER_FAIL` | ||
| Deletes all Keycloak and Infinispan pods with no grace period and remove the associated StatefulSet. Both operators are | ||
scaled down to prevent the removed resources from being recreated. | ||
|=== | ||
|
||
See below for a description of the other environment variables that can be configured. | ||
|
||
`DOMAIN` :: *Required*. The Route53 domain hosting the `client.`, `primary.` and `backup.` subdomains. | ||
|
||
`FAILOVER_DELAY` :: *Optional*. The delay in seconds to wait before initiating cluster failover. Defaults to 60 seconds. | ||
|
||
=== Execution | ||
|
||
Use the xref:benchmark-guide::run/running-benchmark-cli.adoc[] guide to simulate load against a specific Kubernetes environment. | ||
|
||
In parallel execute below command to initiate failover: | ||
|
||
[source,bash] | ||
---- | ||
FAILOVER_MODE="ALL_ROUTES" DOMAIN=... ./kc-failover.sh | ||
---- | ||
|
||
NOTE: In order for the `kc-failover.sh` script to accurately record the time taken for Route53 failover to occur, it's | ||
recommended that the script is executed in the same environment as the Keycloak benchmark scenario. | ||
|
||
== Restoring clusters after failover tests | ||
Once a failover benchmark has been executed, it's possible to restore the original cluster state by executing the script | ||
with the `RECOVERY_MODE` env variable set. The value of `RECOVERY_MODE` determines the subdomain that is used to recreate | ||
the `aws-health-route` Route. | ||
|
||
=== Parameters | ||
|
||
[cols='1,3'] | ||
|=== | ||
| RECOVERY_MODE | Description | ||
|
||
| [.nowrap]`ACTIVE` | ||
| Recreates the `aws-health-route` Route with `primary.${DOMAIN}` URL and scales up the Infinispan and Keycloak operators. | ||
|
||
| [.nowrap]`PASSIVE` | ||
| Recreates the `aws-health-route` Route with `backup.${DOMAIN}` URL and scales up the Infinispan and Keycloak operators. | ||
|=== | ||
|
||
|
||
`DOMAIN` :: *Required*. The Route53 domain hosting the `client.`, `primary.` and `backup.` subdomains. | ||
|
||
=== Execution | ||
|
||
[source,bash] | ||
---- | ||
RECOVERY_MODE=ACTIVE DOMAIN=... ./kc-failover.sh | ||
---- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters