From f5530a0faa24ad836a44bdd8d0ce86bf806fde87 Mon Sep 17 00:00:00 2001 From: Nathaniel Clark Date: Tue, 7 Feb 2017 08:55:06 -0500 Subject: [PATCH] LU-8457 pacemaker: Update healthLNET to 0.99.4 Fixed minor issue with lnet connectivity Fix License header. Test-Parameters: trivial Signed-off-by: Nathaniel Clark Change-Id: I074b93e2e3ea29e608a6f1b46600556a1b255438 Reviewed-on: https://review.whamcloud.com/25297 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Malcolm Cowe --- contrib/scripts/pacemaker/healthLNET | 226 ++++++++++++++++++----------------- 1 file changed, 115 insertions(+), 111 deletions(-) mode change 100644 => 100755 contrib/scripts/pacemaker/healthLNET diff --git a/contrib/scripts/pacemaker/healthLNET b/contrib/scripts/pacemaker/healthLNET old mode 100644 new mode 100755 index fc958c9..4676026 --- a/contrib/scripts/pacemaker/healthLNET +++ b/contrib/scripts/pacemaker/healthLNET @@ -4,15 +4,17 @@ # LNet OCF RA # + # License: GNU General Public License (GPL)v2 # Description: Manages ZFS and Lustre on a shared storage # Written by: Gabriele Paciucci -# Release Date: 01 September 2016 -# Release Version: 0.99 +# Release Date: 01 November 2016 +# Release Version: 0.99.4 # Copyright (c) 2009 Andrew Beekhof # Copyright (c) 2016, Intel Corporation + # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as @@ -30,8 +32,7 @@ # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write the Free Software Foundation, -# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# along with this program. If not, see . # ####################################################################### @@ -48,7 +49,7 @@ meta_data() { -0.99 +0.99.4 Every time the monitor action is run, this resource agent records (in the CIB) @@ -182,7 +183,7 @@ ping_usage() { usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data} Expects to have a fully populated OCF RA-compliant environment set. - END +END } ping_start() { @@ -196,9 +197,9 @@ ping_start() { ping_stop() { - rm -f ${OCF_RESKEY_pidfile} - attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen $attrd_options - return $OCF_SUCCESS + rm -f ${OCF_RESKEY_pidfile} + attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen $attrd_options + return $OCF_SUCCESS } ping_monitor() { @@ -213,14 +214,14 @@ ping_monitor() { } ping_validate() { - # Is the state directory writable? - state_dir=`dirname "$OCF_RESKEY_pidfile"` - touch "$state_dir/$$" - if [ $? != 0 ]; then - ocf_log err "Invalid location for 'state': $state_dir is not writable" - return $OCF_ERR_ARGS - fi - rm "$state_dir/$$" + # Is the state directory writable? + state_dir=`dirname "$OCF_RESKEY_pidfile"` + touch "$state_dir/$$" + if [ $? != 0 ]; then + ocf_log err "Invalid location for 'state': $state_dir is not writable" + return $OCF_ERR_ARGS + fi + rm "$state_dir/$$" # Pidfile better be an absolute path case $OCF_RESKEY_pidfile in @@ -229,32 +230,32 @@ ping_validate() { esac # Check the host list - if [ "x" = "x$OCF_RESKEY_host_list" ]; then - ocf_log err "Empty host_list. Please specify some nodes to ping" - exit $OCF_ERR_CONFIGURED - fi + if [ "x" = "x$OCF_RESKEY_host_list" ]; then + ocf_log err "Empty host_list. Please specify some nodes to ping" + exit $OCF_ERR_CONFIGURED + fi - check_binary ping + check_binary ping - return $OCF_SUCCESS + return $OCF_SUCCESS } lctl_check() { - active=0 - for host in $OCF_RESKEY_host_list; do - lctl_exe="lctl ping" + active=0 + for host in $OCF_RESKEY_host_list; do + lctl_exe="lctl ping" - lctl_out=`$lctl_exe $host $OCF_RESKEY_timeout 2>&1`; rc=$? - # debug - # ocf_log info "$lctl_exe $host $OCF_RESKEY_timeout" + lctl_out=`$lctl_exe $host $OCF_RESKEY_timeout 2>&1`; rc=$? + # debug + # ocf_log info "$lctl_exe $host $OCF_RESKEY_timeout" - case $rc in - 0) active=`expr $active + 1`;; - 1) ping_conditional_log warn "$host is inactive: $lctl_out";; - *) ocf_log err "Unexpected result for '$lctl_exe $host $OCF_RESKEY_timeout' $rc: $p_out";; + case $rc in + 0) active=`expr $active + 1`;; + 1) ping_conditional_log warn "$host is inactive: $lctl_out";; + *) ocf_log err "Unexpected result for '$lctl_exe $host $OCF_RESKEY_timeout' $rc: $p_out";; esac - done - return $active + done + return $active } @@ -262,76 +263,79 @@ lctl_check() { ping_check() { - active=0 - for host in $OCF_RESKEY_host_list; do - p_exe=ping - - case `uname` in - Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";; - Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; - *) ocf_log err "Unknown host type: `uname`"; exit $OCF_ERR_INSTALLED;; - esac - - case $host in - *:*) p_exe=ping6 - esac - - p_out=`$p_exe $p_args $OCF_RESKEY_options $host 2>&1`; rc=$? - - case $rc in - 0) active=`expr $active + 1`;; - 1) ping_conditional_log warn "$host is inactive: $p_out";; - *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";; - esac - done - return $active + active=0 + for host in $OCF_RESKEY_host_list; do + p_exe=ping + + case `uname` in + Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";; + Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; + *) ocf_log err "Unknown host type: `uname`"; exit $OCF_ERR_INSTALLED;; + esac + + case $host in + *:*) p_exe=ping6 + esac + + p_out=`$p_exe $p_args $OCF_RESKEY_options $host 2>&1`; rc=$? + + case $rc in + 0) active=`expr $active + 1`;; + 1) ping_conditional_log warn "$host is inactive: $p_out";; + *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";; + esac + done + return $active } ping_update() { - # first I'm testing if I have the physical link up. - # If not I give up without any additional tests. - # but first we need to find which is the device we are using on the localhost. + # first I'm testing if I have the physical link up. + # If not I give up without any additional tests. + # but first we need to find which is the device we are using on the localhost. - CARRIER=/sys/class/net/$OCF_RESKEY_device/carrier - OPERSTATE=/sys/class/net/$OCF_RESKEY_device/operstate + CARRIER=/sys/class/net/$OCF_RESKEY_device/carrier + OPERSTATE=/sys/class/net/$OCF_RESKEY_device/operstate - CAR_STAT=$(cat $CARRIER) - OPER_STAT=$(cat $OPERSTATE) + CAR_STAT=$(cat $CARRIER) + OPER_STAT=$(cat $OPERSTATE) + + # debug + # ocf_log info "$CAR_STAT - $OPER_STAT" - # debug - # ocf_log info "$CAR_STAT - $OPER_STAT" - if [ "$CAR_STAT" == "1" ] && [ "$OPER_STAT" == "up" ]; then - if [ ${OCF_RESKEY_lctl} = "true" ]; then - lctl_check - active=$? + if [ "$CAR_STAT" == "1" ] && [ "$OPER_STAT" == "up" ]; then + if [ ${OCF_RESKEY_lctl} = "true" ]; then + lctl_check + active=$? + else + ping_check + active=$? + fi else - ping_check - active=$? + active=0 fi - else - active=0 - fi - - # debug - # ocf_log info "$active" - score=`expr $active \* $OCF_RESKEY_multiplier` - attrd_updater -n $OCF_RESKEY_name -v $score -d $OCF_RESKEY_dampen $attrd_options - rc=$? - case $rc in - 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; - *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; - esac - if [ $rc -ne 0 ]; then - return $rc - fi + # debug + # ocf_log info "$active" - if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then - ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" - return 1 - fi - return 0 + score=`expr $active \* $OCF_RESKEY_multiplier` + attrd_updater -n $OCF_RESKEY_name -v $score -d $OCF_RESKEY_dampen $attrd_options + rc=$? + case $rc in + 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; + *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; + esac + if [ $rc -ne 0 ]; then + return $rc + fi + if [ $score -eq 0 ]; then + ocf_log err "LNet connection failed please check" + fi + if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then + ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" + return 1 + fi + return 0 } : ${OCF_RESKEY_name:="pingd"} @@ -355,38 +359,38 @@ case ${OCF_RESKEY_timeout} in esac if [ -z ${OCF_RESKEY_timeout} ]; then - if [ x"$OCF_RESKEY_host_list" != x ]; then - host_count=`echo $OCF_RESKEY_host_list | awk '{print NF}'` - OCF_RESKEY_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts` - OCF_RESKEY_timeout=`expr $OCF_RESKEY_timeout / 1100` # Convert to seconds and finish 10% early - else - OCF_RESKEY_timeout=5 - fi + if [ x"$OCF_RESKEY_host_list" != x ]; then + host_count=`echo $OCF_RESKEY_host_list | awk '{print NF}'` + OCF_RESKEY_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts` + OCF_RESKEY_timeout=`expr $OCF_RESKEY_timeout / 1100` # Convert to seconds and finish 10% early + else + OCF_RESKEY_timeout=5 + fi fi if [ ${OCF_RESKEY_timeout} -lt 1 ]; then - OCF_RESKEY_timeout=5 -elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then - # ping actually complains if this value is too high, 5 minutes is plenty - OCF_RESKEY_timeout=300 + OCF_RESKEY_timeout=5 + elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then + # ping actually complains if this value is too high, 5 minutes is plenty + OCF_RESKEY_timeout=300 fi if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then - : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESKEY_name}"} + : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESKEY_name}"} else - : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESOURCE_INSTANCE}"} + : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESOURCE_INSTANCE}"} fi attrd_options='-q' if ocf_is_true ${OCF_RESKEY_debug} ; then - attrd_options='' + attrd_options='' fi # Check the debug option case "${OCF_RESKEY_debug}" in - true|True|TRUE|1) OCF_RESKEY_debug=true;; - false|False|FALSE|0) OCF_RESKEY_debug=false;; - *) + true|True|TRUE|1) OCF_RESKEY_debug=true;; + false|False|FALSE|0) OCF_RESKEY_debug=false;; + *) ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}" OCF_RESKEY_debug=false ;; -- 1.8.3.1