-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pacemaker: add new resource agents for ptp and ntp status
It might be useful to be able to locate guests based on the PTP synchronization status of the host (if a host loses sync, we may prefer to migrate the vm). For that, this commit adds two resource agents. Signed-off-by: Florent CARLI <florent.carli@rte-france.com>
- Loading branch information
1 parent
0b8c8c3
commit 7afa2f6
Showing
4 changed files
with
618 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,314 @@ | ||
#!/bin/sh | ||
# | ||
# ocf:seapath:ntpstatus resource agent | ||
# | ||
# Original copyright 2004 SUSE LINUX AG, Lars Marowsky-Br<E9>e | ||
# Later changes copyright 2008-2019 the Pacemaker project contributors | ||
# | ||
# The version control history for this file may have further details. | ||
# | ||
# This source code is licensed under the GNU General Public License version 2 | ||
# (GPLv2) WITHOUT ANY WARRANTY. | ||
# | ||
# crm config example: | ||
#primitive ntpstatus_test ocf:seapath:ntpstatus \ | ||
# op monitor timeout=10 interval=10 | ||
#clone cl_ntpstatus_test ntpstatus_test \ | ||
# meta target-role=Started | ||
#location ntp_test_debian debian \ | ||
# rule ntpstatus: defined ntpstatus | ||
# | ||
####################################################################### | ||
# Initialization: | ||
|
||
: ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} | ||
. "${OCF_FUNCTIONS}" | ||
: ${__OCF_ACTION:="$1"} | ||
|
||
####################################################################### | ||
|
||
meta_data() { | ||
cat <<END | ||
<?xml version="1.0"?> | ||
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> | ||
<resource-agent name="ntpstatus" version="1.0"> | ||
<version>1.0</version> | ||
<longdesc lang="en"> | ||
Checks the status of the connectivity to a ntp server | ||
</longdesc> | ||
<shortdesc lang="en">Checks the status of the connectivity to a ntp server</shortdesc> | ||
<parameters> | ||
<parameter name="state" unique="1"> | ||
<longdesc lang="en"> | ||
Location to store the resource state in. | ||
</longdesc> | ||
<shortdesc lang="en">State file</shortdesc> | ||
<content type="string" default="${HA_VARRUN%%/}/ntpstatus-${OCF_RESOURCE_INSTANCE}.state" /> | ||
</parameter> | ||
<parameter name="ntpstatus" unique="1"> | ||
<longdesc lang="en"> | ||
ntpstatus | ||
</longdesc> | ||
<shortdesc lang="en">ntpstatus</shortdesc> | ||
<content type="string" default="" /> | ||
</parameter> | ||
<parameter name="op_sleep" unique="1"> | ||
<longdesc lang="en"> | ||
Number of seconds to sleep during operations. This can be used to test how | ||
the cluster reacts to operation timeouts. | ||
</longdesc> | ||
<shortdesc lang="en">Operation sleep duration in seconds.</shortdesc> | ||
<content type="string" default="0" /> | ||
</parameter> | ||
<parameter name="fail_start_on" unique="0"> | ||
<longdesc lang="en"> | ||
Start actions will return failure if running on the host specified here, but | ||
the resource will start successfully anyway (future monitor calls will find it | ||
running). This can be used to test on-fail=ignore. | ||
</longdesc> | ||
<shortdesc lang="en">Report bogus start failure on specified host</shortdesc> | ||
<content type="string" default="" /> | ||
</parameter> | ||
<parameter name="envfile" unique="1"> | ||
<longdesc lang="en"> | ||
If this is set, the environment will be dumped to this file for every call. | ||
</longdesc> | ||
<shortdesc lang="en">Environment dump file</shortdesc> | ||
<content type="string" default="" /> | ||
</parameter> | ||
<parameter name="multiplier" unique="0"> | ||
<longdesc lang="en"> | ||
The number by which to multiply the connectivity (0 or 1) by | ||
</longdesc> | ||
<shortdesc lang="en">Value multiplier</shortdesc> | ||
<content type="integer" default="1"/> | ||
</parameter> | ||
<parameter name="host_ip" unique="0" required="1"> | ||
<longdesc lang="en"> | ||
ip address to check the connectivity to | ||
</longdesc> | ||
<shortdesc lang="en">Host IP</shortdesc> | ||
<content type="string" default=""/> | ||
</parameter> | ||
</parameters> | ||
<actions> | ||
<action name="start" timeout="20s" /> | ||
<action name="stop" timeout="20s" /> | ||
<action name="monitor" timeout="20s" interval="10s" depth="0"/> | ||
<action name="reload" timeout="20s" /> | ||
<action name="migrate_to" timeout="20s" /> | ||
<action name="migrate_from" timeout="20s" /> | ||
<action name="validate-all" timeout="20s" /> | ||
<action name="meta-data" timeout="5s" /> | ||
</actions> | ||
</resource-agent> | ||
END | ||
} | ||
|
||
####################################################################### | ||
|
||
# don't exit on TERM, to test that pacemaker-execd makes sure that we do exit | ||
trap sigterm_handler TERM | ||
sigterm_handler() { | ||
ocf_log info "They use TERM to bring us down. No such luck." | ||
|
||
# Since we're likely going to get KILLed, clean up any monitor | ||
# serialization in progress, so the next probe doesn't return an error. | ||
rm -f "${VERIFY_SERIALIZED_FILE}" | ||
return | ||
} | ||
|
||
ntpstatus_usage() { | ||
cat <<END | ||
usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data} | ||
Expects to have a fully populated OCF RA-compliant environment set. | ||
END | ||
} | ||
|
||
dump_env() { | ||
if [ "${OCF_RESKEY_envfile}" != "" ]; then | ||
echo "### ${__OCF_ACTION} @ $(date) ### | ||
$(env | sort) | ||
###" >> "${OCF_RESKEY_envfile}" | ||
fi | ||
} | ||
|
||
ntpstatus_update() { | ||
# get the 5th column from chrony sources output (reach), convert from octal to binary and get the last digit (last contact with server) | ||
# 1 --> last contact fine | ||
# 0 --> last contact problem | ||
status=`/usr/bin/chronyc sources | grep -E "$OCF_RESKEY_host_ip" | awk '{print $5}' | python3 -c "import sys;print(\"{0:b}\".format(int(input(),8))[-1])"` | ||
status=$(expr $status \* $OCF_RESKEY_multiplier) | ||
if [ "$__OCF_ACTION" = "start" ] ; then | ||
attrd_updater -n "$OCF_RESKEY_ntpstatus" -B "$status" -d "$OCF_RESKEY_dampen" $attrd_options | ||
else | ||
attrd_updater -n "$OCF_RESKEY_ntpstatus" -v "$status" -d "$OCF_RESKEY_dampen" $attrd_options | ||
fi | ||
rc=$? | ||
case $rc in | ||
0) ocf_log info "Updated $OCF_RESKEY_ntpstatus = $status" ;; | ||
*) ocf_log warn "Could not update $OCF_RESKEY_ntpstatus = $status: rc=$rc";; | ||
esac | ||
if [ $rc -ne 0 ]; then | ||
return $rc | ||
fi | ||
} | ||
|
||
ntpstatus_start() { | ||
ntpstatus_monitor | ||
|
||
DS_RETVAL=$? | ||
if [ $DS_RETVAL -eq $OCF_SUCCESS ]; then | ||
if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then | ||
DS_RETVAL=$OCF_ERR_GENERIC | ||
fi | ||
return $DS_RETVAL | ||
fi | ||
|
||
touch "${OCF_RESKEY_state}" | ||
DS_RETVAL=$? | ||
if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then | ||
DS_RETVAL=$OCF_ERR_GENERIC | ||
fi | ||
ntpstatus_update | ||
return $DS_RETVAL | ||
} | ||
|
||
ntpstatus_stop() { | ||
ntpstatus_monitor --force | ||
attrd_updater -D -n "$OCF_RESKEY_ntpstatus" -d "$OCF_RESKEY_dampen" $attrd_options | ||
if [ $? -eq $OCF_SUCCESS ]; then | ||
rm "${OCF_RESKEY_state}" | ||
fi | ||
rm -f "${VERIFY_SERIALIZED_FILE}" | ||
return $OCF_SUCCESS | ||
} | ||
|
||
ntpstatus_monitor() { | ||
if [ $OCF_RESKEY_op_sleep -ne 0 ]; then | ||
if [ "$1" = "" ] && [ -f "${VERIFY_SERIALIZED_FILE}" ]; then | ||
# two monitor ops have occurred at the same time. | ||
# This verifies a condition in pacemaker-execd regression tests. | ||
ocf_log err "$VERIFY_SERIALIZED_FILE exists already" | ||
ocf_exit_reason "alternate universe collision" | ||
return $OCF_ERR_GENERIC | ||
fi | ||
|
||
touch "${VERIFY_SERIALIZED_FILE}" | ||
sleep ${OCF_RESKEY_op_sleep} | ||
rm "${VERIFY_SERIALIZED_FILE}" | ||
fi | ||
|
||
if [ -f "${OCF_RESKEY_state}" ]; then | ||
ntpstatus_update | ||
# Multiple monitor levels are defined to support various tests | ||
case "$OCF_CHECK_LEVEL" in | ||
10) | ||
# monitor level with delay, useful for testing timeouts | ||
sleep 30 | ||
;; | ||
|
||
20) | ||
# monitor level that fails intermittently | ||
n=$(expr "$(dd if=/dev/urandom bs=1 count=1 2>/dev/null | od | head -1 | cut -f2 -d' ')" % 5) | ||
if [ $n -eq 1 ]; then | ||
ocf_exit_reason "smoke detected near CPU fan" | ||
return $OCF_ERR_GENERIC | ||
fi | ||
;; | ||
|
||
30) | ||
# monitor level that always fails | ||
ocf_exit_reason "hyperdrive quota reached" | ||
return $OCF_ERR_GENERIC | ||
;; | ||
|
||
40) | ||
# monitor level that returns error code from state file | ||
rc=$(cat ${OCF_RESKEY_state}) | ||
[ -n "$rc" ] && ocf_exit_reason "CPU ejected. Observed leaving the Kronosnet galaxy at $rc times the speed of light." && return $rc | ||
;; | ||
|
||
*) | ||
;; | ||
esac | ||
return $OCF_SUCCESS | ||
fi | ||
return $OCF_NOT_RUNNING | ||
} | ||
|
||
ntpstatus_validate() { | ||
# Is the state directory writable? | ||
state_dir=$(dirname "$OCF_RESKEY_state") | ||
[ -d "$state_dir" ] && [ -w "$state_dir" ] && [ -x "$state_dir" ] | ||
if [ $? -ne 0 ]; then | ||
return $OCF_ERR_ARGS | ||
fi | ||
|
||
# Check the host ip | ||
if [ -z "$OCF_RESKEY_host_ip" ]; then | ||
ocf_log err "Empty host_ip. Please specify a host to check" | ||
exit $OCF_ERR_CONFIGURED | ||
fi | ||
return $OCF_SUCCESS | ||
} | ||
|
||
: ${OCF_RESKEY_op_sleep:=0} | ||
: ${OCF_RESKEY_CRM_meta_interval:=0} | ||
: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} | ||
: ${OCF_RESKEY_ntpstatus:="ntpstatus"} | ||
: ${OCF_RESKEY_dampen:=5} | ||
: ${OCF_RESKEY_multiplier:=1000} | ||
|
||
if [ -z "$OCF_RESKEY_state" ]; then | ||
OCF_RESKEY_state="${HA_VARRUN%%/}/ntpstatus-${OCF_RESOURCE_INSTANCE}.state" | ||
|
||
if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then | ||
# Strip off the trailing clone marker (note + is not portable in sed) | ||
OCF_RESKEY_state=$(echo $OCF_RESKEY_state | sed s/:[0-9][0-9]*\.state/.state/) | ||
fi | ||
fi | ||
VERIFY_SERIALIZED_FILE="${OCF_RESKEY_state}.serialized" | ||
|
||
dump_env | ||
|
||
case "$__OCF_ACTION" in | ||
meta-data) meta_data | ||
exit $OCF_SUCCESS | ||
;; | ||
start) ntpstatus_start;; | ||
stop) ntpstatus_stop;; | ||
monitor) ntpstatus_monitor;; | ||
migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}." | ||
ntpstatus_stop | ||
;; | ||
migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}." | ||
ntpstatus_start | ||
;; | ||
reload) ocf_log err "Reloading..." | ||
ntpstatus_start | ||
;; | ||
validate-all) ntpstatus_validate;; | ||
usage|help) ntpstatus_usage | ||
exit $OCF_SUCCESS | ||
;; | ||
*) ntpstatus_usage | ||
exit $OCF_ERR_UNIMPLEMENTED | ||
;; | ||
esac | ||
rc=$? | ||
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" | ||
exit $rc | ||
|
||
# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: |
Oops, something went wrong.