#!/bin/sh # # # LNet OCF RA # # License: GNU General Public License (GPL)v2 # Description: Manages ZFS and Lustre on a shared storage # Written by: Gabriele Paciucci # Release Date: 01 September 2016 # Release Version: 0.99 # Copyright (c) 2009 Andrew Beekhof # Copyright (c) 2016, Intel Corporation # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < 0.99 Every time the monitor action is run, this resource agent records (in the CIB) the current number of lctl ping nodes the host can connect to. LNet connectivity PID file PID file The time to wait (dampening) further changes occur Dampening interval The name of the attributes to set. This is the name to be used in the constraints. Attribute name The number by which to multiply the number of connected ping nodes by Value multiplier The list of ping nodes to count. Host list Number of ping attempts, per host, before declaring it dead no. of ping attempts How long, in seconds, to wait before declaring a ping lost ping timeout in seconds Option to enable lctl ping. The default is true Extra Options Device used for the LNET network. We assume the same device accross the cluster LNET device A catch all for any other options that need to be passed to ping. Extra Options Resource is failed if the score is less than failure_score. Default never fails. failure_score Enables to use default attrd_updater verbose logging on every call. Verbose logging END } ####################################################################### ping_conditional_log() { level=$1; shift if [ ${OCF_RESKEY_debug} = "true" ]; then ocf_log $level "$*" fi } ping_usage() { cat <&1`; rc=$? # debug # ocf_log info "$lctl_exe $host $OCF_RESKEY_timeout" case $rc in 0) active=`expr $active + 1`;; 1) ping_conditional_log warn "$host is inactive: $lctl_out";; *) ocf_log err "Unexpected result for '$lctl_exe $host $OCF_RESKEY_timeout' $rc: $p_out";; esac done return $active } ping_check() { active=0 for host in $OCF_RESKEY_host_list; do p_exe=ping case `uname` in Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";; Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; *) ocf_log err "Unknown host type: `uname`"; exit $OCF_ERR_INSTALLED;; esac case $host in *:*) p_exe=ping6 esac p_out=`$p_exe $p_args $OCF_RESKEY_options $host 2>&1`; rc=$? case $rc in 0) active=`expr $active + 1`;; 1) ping_conditional_log warn "$host is inactive: $p_out";; *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";; esac done return $active } ping_update() { # first I'm testing if I have the physical link up. # If not I give up without any additional tests. # but first we need to find which is the device we are using on the localhost. CARRIER=/sys/class/net/$OCF_RESKEY_device/carrier OPERSTATE=/sys/class/net/$OCF_RESKEY_device/operstate CAR_STAT=$(cat $CARRIER) OPER_STAT=$(cat $OPERSTATE) # debug # ocf_log info "$CAR_STAT - $OPER_STAT" if [ "$CAR_STAT" == "1" ] && [ "$OPER_STAT" == "up" ]; then if [ ${OCF_RESKEY_lctl} = "true" ]; then lctl_check active=$? else ping_check active=$? fi else active=0 fi # debug # ocf_log info "$active" score=`expr $active \* $OCF_RESKEY_multiplier` attrd_updater -n $OCF_RESKEY_name -v $score -d $OCF_RESKEY_dampen $attrd_options rc=$? case $rc in 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; esac if [ $rc -ne 0 ]; then return $rc fi if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" return 1 fi return 0 } : ${OCF_RESKEY_name:="pingd"} : ${OCF_RESKEY_dampen:="5s"} : ${OCF_RESKEY_attempts:="3"} : ${OCF_RESKEY_multiplier:="1"} : ${OCF_RESKEY_debug:="false"} : ${OCF_RESKEY_lctl:="true"} #: ${OCF_RESKEY_device:="eth1"} : ${OCF_RESKEY_failure_score:="0"} : ${OCF_RESKEY_CRM_meta_timeout:="20000"} : ${OCF_RESKEY_CRM_meta_globally_unique:="true"} integer=`echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*'` case ${OCF_RESKEY_timeout} in *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=`expr $integer / 1000`;; *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=`expr $integer \* 60`;; *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=`expr $integer \* 60 \* 60`;; *) OCF_RESKEY_timeout=$integer;; esac if [ -z ${OCF_RESKEY_timeout} ]; then if [ x"$OCF_RESKEY_host_list" != x ]; then host_count=`echo $OCF_RESKEY_host_list | awk '{print NF}'` OCF_RESKEY_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts` OCF_RESKEY_timeout=`expr $OCF_RESKEY_timeout / 1100` # Convert to seconds and finish 10% early else OCF_RESKEY_timeout=5 fi fi if [ ${OCF_RESKEY_timeout} -lt 1 ]; then OCF_RESKEY_timeout=5 elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then # ping actually complains if this value is too high, 5 minutes is plenty OCF_RESKEY_timeout=300 fi if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESKEY_name}"} else : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESOURCE_INSTANCE}"} fi attrd_options='-q' if ocf_is_true ${OCF_RESKEY_debug} ; then attrd_options='' fi # Check the debug option case "${OCF_RESKEY_debug}" in true|True|TRUE|1) OCF_RESKEY_debug=true;; false|False|FALSE|0) OCF_RESKEY_debug=false;; *) ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}" OCF_RESKEY_debug=false ;; esac case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ping_start;; stop) ping_stop;; monitor) ping_monitor;; reload) ping_start;; validate-all) ping_validate;; usage|help) ping_usage exit $OCF_SUCCESS ;; *) ping_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac