Skip to content

Commit

Permalink
pacemaker: add new resource agents for ptp and ntp status
Browse files Browse the repository at this point in the history
It might be useful to be able to locate guests based on the PTP synchronization status of the host (if a host loses sync, we may prefer to migrate the vm).
For that, this commit adds two resource agents.

Signed-off-by: Florent CARLI <florent.carli@rte-france.com>
  • Loading branch information
insatomcat committed Aug 17, 2023
1 parent 0b8c8c3 commit 7afa2f6
Show file tree
Hide file tree
Showing 4 changed files with 618 additions and 5 deletions.
12 changes: 7 additions & 5 deletions roles/debian/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,13 @@
state: directory
mode: '0755'

- name: Copy VirtualDomain file
ansible.builtin.copy:
src: ../src/debian/VirtualDomain
dest: /usr/lib/ocf/resource.d/seapath/VirtualDomain
mode: '0755'
- name: Copy Pacemaker Seapath Resource-Agent files
ansible.posix.synchronize:
src: ../src/debian/pacemaker_ra/
dest: /usr/lib/ocf/resource.d/seapath/
rsync_opts:
- "--chmod=F755"
- "--chown=root:root"

- name: create /var/log/syslog-ng folder on hosts
file:
Expand Down
File renamed without changes.
314 changes: 314 additions & 0 deletions src/debian/pacemaker_ra/ntpstatus
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
#!/bin/sh
#
# ocf:seapath:ntpstatus resource agent
#
# Original copyright 2004 SUSE LINUX AG, Lars Marowsky-Br<E9>e
# Later changes copyright 2008-2019 the Pacemaker project contributors
#
# The version control history for this file may have further details.
#
# This source code is licensed under the GNU General Public License version 2
# (GPLv2) WITHOUT ANY WARRANTY.
#
# crm config example:
#primitive ntpstatus_test ocf:seapath:ntpstatus \
# op monitor timeout=10 interval=10
#clone cl_ntpstatus_test ntpstatus_test \
# meta target-role=Started
#location ntp_test_debian debian \
# rule ntpstatus: defined ntpstatus
#
#######################################################################
# Initialization:

: ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"}
. "${OCF_FUNCTIONS}"
: ${__OCF_ACTION:="$1"}

#######################################################################

meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ntpstatus" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Checks the status of the connectivity to a ntp server
</longdesc>
<shortdesc lang="en">Checks the status of the connectivity to a ntp server</shortdesc>
<parameters>
<parameter name="state" unique="1">
<longdesc lang="en">
Location to store the resource state in.
</longdesc>
<shortdesc lang="en">State file</shortdesc>
<content type="string" default="${HA_VARRUN%%/}/ntpstatus-${OCF_RESOURCE_INSTANCE}.state" />
</parameter>
<parameter name="ntpstatus" unique="1">
<longdesc lang="en">
ntpstatus
</longdesc>
<shortdesc lang="en">ntpstatus</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="op_sleep" unique="1">
<longdesc lang="en">
Number of seconds to sleep during operations. This can be used to test how
the cluster reacts to operation timeouts.
</longdesc>
<shortdesc lang="en">Operation sleep duration in seconds.</shortdesc>
<content type="string" default="0" />
</parameter>
<parameter name="fail_start_on" unique="0">
<longdesc lang="en">
Start actions will return failure if running on the host specified here, but
the resource will start successfully anyway (future monitor calls will find it
running). This can be used to test on-fail=ignore.
</longdesc>
<shortdesc lang="en">Report bogus start failure on specified host</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="envfile" unique="1">
<longdesc lang="en">
If this is set, the environment will be dumped to this file for every call.
</longdesc>
<shortdesc lang="en">Environment dump file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="multiplier" unique="0">
<longdesc lang="en">
The number by which to multiply the connectivity (0 or 1) by
</longdesc>
<shortdesc lang="en">Value multiplier</shortdesc>
<content type="integer" default="1"/>
</parameter>
<parameter name="host_ip" unique="0" required="1">
<longdesc lang="en">
ip address to check the connectivity to
</longdesc>
<shortdesc lang="en">Host IP</shortdesc>
<content type="string" default=""/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="monitor" timeout="20s" interval="10s" depth="0"/>
<action name="reload" timeout="20s" />
<action name="migrate_to" timeout="20s" />
<action name="migrate_from" timeout="20s" />
<action name="validate-all" timeout="20s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}

#######################################################################

# don't exit on TERM, to test that pacemaker-execd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
ocf_log info "They use TERM to bring us down. No such luck."

# Since we're likely going to get KILLed, clean up any monitor
# serialization in progress, so the next probe doesn't return an error.
rm -f "${VERIFY_SERIALIZED_FILE}"
return
}

ntpstatus_usage() {
cat <<END
usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}

dump_env() {
if [ "${OCF_RESKEY_envfile}" != "" ]; then
echo "### ${__OCF_ACTION} @ $(date) ###
$(env | sort)
###" >> "${OCF_RESKEY_envfile}"
fi
}

ntpstatus_update() {
# get the 5th column from chrony sources output (reach), convert from octal to binary and get the last digit (last contact with server)
# 1 --> last contact fine
# 0 --> last contact problem
status=`/usr/bin/chronyc sources | grep -E "$OCF_RESKEY_host_ip" | awk '{print $5}' | python3 -c "import sys;print(\"{0:b}\".format(int(input(),8))[-1])"`
status=$(expr $status \* $OCF_RESKEY_multiplier)
if [ "$__OCF_ACTION" = "start" ] ; then
attrd_updater -n "$OCF_RESKEY_ntpstatus" -B "$status" -d "$OCF_RESKEY_dampen" $attrd_options
else
attrd_updater -n "$OCF_RESKEY_ntpstatus" -v "$status" -d "$OCF_RESKEY_dampen" $attrd_options
fi
rc=$?
case $rc in
0) ocf_log info "Updated $OCF_RESKEY_ntpstatus = $status" ;;
*) ocf_log warn "Could not update $OCF_RESKEY_ntpstatus = $status: rc=$rc";;
esac
if [ $rc -ne 0 ]; then
return $rc
fi
}

ntpstatus_start() {
ntpstatus_monitor

DS_RETVAL=$?
if [ $DS_RETVAL -eq $OCF_SUCCESS ]; then
if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then
DS_RETVAL=$OCF_ERR_GENERIC
fi
return $DS_RETVAL
fi

touch "${OCF_RESKEY_state}"
DS_RETVAL=$?
if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then
DS_RETVAL=$OCF_ERR_GENERIC
fi
ntpstatus_update
return $DS_RETVAL
}

ntpstatus_stop() {
ntpstatus_monitor --force
attrd_updater -D -n "$OCF_RESKEY_ntpstatus" -d "$OCF_RESKEY_dampen" $attrd_options
if [ $? -eq $OCF_SUCCESS ]; then
rm "${OCF_RESKEY_state}"
fi
rm -f "${VERIFY_SERIALIZED_FILE}"
return $OCF_SUCCESS
}

ntpstatus_monitor() {
if [ $OCF_RESKEY_op_sleep -ne 0 ]; then
if [ "$1" = "" ] && [ -f "${VERIFY_SERIALIZED_FILE}" ]; then
# two monitor ops have occurred at the same time.
# This verifies a condition in pacemaker-execd regression tests.
ocf_log err "$VERIFY_SERIALIZED_FILE exists already"
ocf_exit_reason "alternate universe collision"
return $OCF_ERR_GENERIC
fi

touch "${VERIFY_SERIALIZED_FILE}"
sleep ${OCF_RESKEY_op_sleep}
rm "${VERIFY_SERIALIZED_FILE}"
fi

if [ -f "${OCF_RESKEY_state}" ]; then
ntpstatus_update
# Multiple monitor levels are defined to support various tests
case "$OCF_CHECK_LEVEL" in
10)
# monitor level with delay, useful for testing timeouts
sleep 30
;;

20)
# monitor level that fails intermittently
n=$(expr "$(dd if=/dev/urandom bs=1 count=1 2>/dev/null | od | head -1 | cut -f2 -d' ')" % 5)
if [ $n -eq 1 ]; then
ocf_exit_reason "smoke detected near CPU fan"
return $OCF_ERR_GENERIC
fi
;;

30)
# monitor level that always fails
ocf_exit_reason "hyperdrive quota reached"
return $OCF_ERR_GENERIC
;;

40)
# monitor level that returns error code from state file
rc=$(cat ${OCF_RESKEY_state})
[ -n "$rc" ] && ocf_exit_reason "CPU ejected. Observed leaving the Kronosnet galaxy at $rc times the speed of light." && return $rc
;;

*)
;;
esac
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}

ntpstatus_validate() {
# Is the state directory writable?
state_dir=$(dirname "$OCF_RESKEY_state")
[ -d "$state_dir" ] && [ -w "$state_dir" ] && [ -x "$state_dir" ]
if [ $? -ne 0 ]; then
return $OCF_ERR_ARGS
fi

# Check the host ip
if [ -z "$OCF_RESKEY_host_ip" ]; then
ocf_log err "Empty host_ip. Please specify a host to check"
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}

: ${OCF_RESKEY_op_sleep:=0}
: ${OCF_RESKEY_CRM_meta_interval:=0}
: ${OCF_RESKEY_CRM_meta_globally_unique:="false"}
: ${OCF_RESKEY_ntpstatus:="ntpstatus"}
: ${OCF_RESKEY_dampen:=5}
: ${OCF_RESKEY_multiplier:=1000}

if [ -z "$OCF_RESKEY_state" ]; then
OCF_RESKEY_state="${HA_VARRUN%%/}/ntpstatus-${OCF_RESOURCE_INSTANCE}.state"

if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then
# Strip off the trailing clone marker (note + is not portable in sed)
OCF_RESKEY_state=$(echo $OCF_RESKEY_state | sed s/:[0-9][0-9]*\.state/.state/)
fi
fi
VERIFY_SERIALIZED_FILE="${OCF_RESKEY_state}.serialized"

dump_env

case "$__OCF_ACTION" in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) ntpstatus_start;;
stop) ntpstatus_stop;;
monitor) ntpstatus_monitor;;
migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}."
ntpstatus_stop
;;
migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}."
ntpstatus_start
;;
reload) ocf_log err "Reloading..."
ntpstatus_start
;;
validate-all) ntpstatus_validate;;
usage|help) ntpstatus_usage
exit $OCF_SUCCESS
;;
*) ntpstatus_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc

# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80:
Loading

0 comments on commit 7afa2f6

Please sign in to comment.