#!/bin/sh
#
#
# LNet OCF RA
#
# License: GNU General Public License (GPL)v2
# Description: Manages ZFS and Lustre on a shared storage
# Written by: Gabriele Paciucci
# Release Date: 01 November 2016
# Release Version: 0.99.4
# Copyright (c) 2009 Andrew Beekhof
# Copyright (c) 2016, Intel Corporation
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
. ${OCF_FUNCTIONS}
: ${__OCF_ACTION=$1}
#######################################################################
meta_data() {
cat <
0.99.4
Every time the monitor action is run, this resource agent records (in the CIB)
the current number of lctl ping nodes the host can connect to.
LNet connectivity
PID file
PID file
The time to wait (dampening) further changes occur
Dampening interval
The name of the attributes to set. This is the name to be used in the constraints.
Attribute name
The number by which to multiply the number of connected ping nodes by
Value multiplier
The list of ping nodes to count.
Host list
Number of ping attempts, per host, before declaring it dead
no. of ping attempts
How long, in seconds, to wait before declaring a ping lost
ping timeout in seconds
Option to enable lctl ping. The default is true
Extra Options
Device used for the LNET network. We assume the same device accross the cluster
LNET device
A catch all for any other options that need to be passed to ping.
Extra Options
Resource is failed if the score is less than failure_score.
Default never fails.
failure_score
Enables to use default attrd_updater verbose logging on every call.
Verbose logging
END
}
#######################################################################
ping_conditional_log() {
level=$1; shift
if [ ${OCF_RESKEY_debug} = "true" ]; then
ocf_log $level "$*"
fi
}
ping_usage() {
cat <&1`; rc=$?
# debug
# ocf_log info "$lctl_exe $host $OCF_RESKEY_timeout"
case $rc in
0) active=`expr $active + 1`;;
1) ping_conditional_log warn "$host is inactive: $lctl_out";;
*) ocf_log err "Unexpected result for '$lctl_exe $host $OCF_RESKEY_timeout' $rc: $p_out";;
esac
done
return $active
}
ping_check() {
active=0
for host in $OCF_RESKEY_host_list; do
p_exe=ping
case `uname` in
Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";;
Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";;
*) ocf_log err "Unknown host type: `uname`"; exit $OCF_ERR_INSTALLED;;
esac
case $host in
*:*) p_exe=ping6
esac
p_out=`$p_exe $p_args $OCF_RESKEY_options $host 2>&1`; rc=$?
case $rc in
0) active=`expr $active + 1`;;
1) ping_conditional_log warn "$host is inactive: $p_out";;
*) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";;
esac
done
return $active
}
ping_update() {
# first I'm testing if I have the physical link up.
# If not I give up without any additional tests.
# but first we need to find which is the device we are using on the localhost.
CARRIER=/sys/class/net/$OCF_RESKEY_device/carrier
OPERSTATE=/sys/class/net/$OCF_RESKEY_device/operstate
CAR_STAT=$(cat $CARRIER)
OPER_STAT=$(cat $OPERSTATE)
# debug
# ocf_log info "$CAR_STAT - $OPER_STAT"
if [ "$CAR_STAT" == "1" ] && [ "$OPER_STAT" == "up" ]; then
if [ ${OCF_RESKEY_lctl} = "true" ]; then
lctl_check
active=$?
else
ping_check
active=$?
fi
else
active=0
fi
# debug
# ocf_log info "$active"
score=`expr $active \* $OCF_RESKEY_multiplier`
attrd_updater -n $OCF_RESKEY_name -v $score -d $OCF_RESKEY_dampen $attrd_options
rc=$?
case $rc in
0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;;
*) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";;
esac
if [ $rc -ne 0 ]; then
return $rc
fi
if [ $score -eq 0 ]; then
ocf_log err "LNet connection failed please check"
fi
if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then
ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)"
return 1
fi
return 0
}
: ${OCF_RESKEY_name:="pingd"}
: ${OCF_RESKEY_dampen:="5s"}
: ${OCF_RESKEY_attempts:="3"}
: ${OCF_RESKEY_multiplier:="1"}
: ${OCF_RESKEY_debug:="false"}
: ${OCF_RESKEY_lctl:="true"}
#: ${OCF_RESKEY_device:="eth1"}
: ${OCF_RESKEY_failure_score:="0"}
: ${OCF_RESKEY_CRM_meta_timeout:="20000"}
: ${OCF_RESKEY_CRM_meta_globally_unique:="true"}
integer=`echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*'`
case ${OCF_RESKEY_timeout} in
*[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=`expr $integer / 1000`;;
*[0-9]m|*[0-9]min) OCF_RESKEY_timeout=`expr $integer \* 60`;;
*[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=`expr $integer \* 60 \* 60`;;
*) OCF_RESKEY_timeout=$integer;;
esac
if [ -z ${OCF_RESKEY_timeout} ]; then
if [ x"$OCF_RESKEY_host_list" != x ]; then
host_count=`echo $OCF_RESKEY_host_list | awk '{print NF}'`
OCF_RESKEY_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts`
OCF_RESKEY_timeout=`expr $OCF_RESKEY_timeout / 1100` # Convert to seconds and finish 10% early
else
OCF_RESKEY_timeout=5
fi
fi
if [ ${OCF_RESKEY_timeout} -lt 1 ]; then
OCF_RESKEY_timeout=5
elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then
# ping actually complains if this value is too high, 5 minutes is plenty
OCF_RESKEY_timeout=300
fi
if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
: ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESKEY_name}"}
else
: ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESOURCE_INSTANCE}"}
fi
attrd_options='-q'
if ocf_is_true ${OCF_RESKEY_debug} ; then
attrd_options=''
fi
# Check the debug option
case "${OCF_RESKEY_debug}" in
true|True|TRUE|1) OCF_RESKEY_debug=true;;
false|False|FALSE|0) OCF_RESKEY_debug=false;;
*)
ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}"
OCF_RESKEY_debug=false
;;
esac
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) ping_start;;
stop) ping_stop;;
monitor) ping_monitor;;
reload) ping_start;;
validate-all) ping_validate;;
usage|help) ping_usage
exit $OCF_SUCCESS
;;
*) ping_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac