7 # License: GNU General Public License (GPL)v2
8 # Description: Manages ZFS and Lustre on a shared storage
9 # Written by: Gabriele Paciucci
10 # Release Date: 01 September 2016
11 # Release Version: 0.99
13 # Copyright (c) 2009 Andrew Beekhof
14 # Copyright (c) 2016, Intel Corporation
17 # This program is free software; you can redistribute it and/or modify
18 # it under the terms of version 2 of the GNU General Public License as
19 # published by the Free Software Foundation.
21 # This program is distributed in the hope that it would be useful, but
22 # WITHOUT ANY WARRANTY; without even the implied warranty of
23 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25 # Further, this software is distributed without any warranty that it is
26 # free of the rightful claim of any third person regarding infringement
27 # or the like. Any license provided herein, whether implied or
28 # otherwise, applies only to this software file. Patent licenses, if
29 # any, provided herein do not apply to combinations of this program with
30 # other software, or any other product whatsoever.
32 # You should have received a copy of the GNU General Public License
33 # along with this program; if not, write the Free Software Foundation,
34 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
37 #######################################################################
40 : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
44 #######################################################################
49 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
50 <resource-agent name="healthLNET">
51 <version>0.99</version>
54 Every time the monitor action is run, this resource agent records (in the CIB)
55 the current number of lctl ping nodes the host can connect to.
57 <shortdesc lang="en">LNet connectivity</shortdesc>
61 <parameter name="pidfile" unique="0">
62 <longdesc lang="en">PID file</longdesc>
63 <shortdesc lang="en">PID file</shortdesc>
64 <content type="string" default="$HA_VARRUN/ping-${OCF_RESOURCE_INSTANCE}" />
67 <parameter name="dampen" unique="0">
69 The time to wait (dampening) further changes occur
71 <shortdesc lang="en">Dampening interval</shortdesc>
72 <content type="integer" default="5s"/>
75 <parameter name="name" unique="0">
77 The name of the attributes to set. This is the name to be used in the constraints.
79 <shortdesc lang="en">Attribute name</shortdesc>
80 <content type="string" default="pingd"/>
83 <parameter name="multiplier" unique="0">
85 The number by which to multiply the number of connected ping nodes by
87 <shortdesc lang="en">Value multiplier</shortdesc>
88 <content type="integer" default=""/>
91 <parameter name="host_list" unique="0" required="1">
93 The list of ping nodes to count.
95 <shortdesc lang="en">Host list</shortdesc>
96 <content type="string" default=""/>
99 <parameter name="attempts" unique="0">
101 Number of ping attempts, per host, before declaring it dead
103 <shortdesc lang="en">no. of ping attempts</shortdesc>
104 <content type="integer" default="2"/>
107 <parameter name="timeout" unique="0">
109 How long, in seconds, to wait before declaring a ping lost
111 <shortdesc lang="en">ping timeout in seconds</shortdesc>
112 <content type="integer" default="2"/>
115 <parameter name="lctl" unique="0">
117 Option to enable lctl ping. The default is true
119 <shortdesc lang="en">Extra Options</shortdesc>
120 <content type="string" default="true"/>
123 <parameter name="device" unique="0">
125 Device used for the LNET network. We assume the same device accross the cluster
127 <shortdesc lang="en">LNET device</shortdesc>
128 <content type="string" default=""/>
132 <parameter name="options" unique="0">
134 A catch all for any other options that need to be passed to ping.
136 <shortdesc lang="en">Extra Options</shortdesc>
137 <content type="string" default=""/>
140 <parameter name="failure_score" unique="0">
142 Resource is failed if the score is less than failure_score.
145 <shortdesc lang="en">failure_score</shortdesc>
146 <content type="integer" default=""/>
149 <parameter name="debug" unique="0">
151 Enables to use default attrd_updater verbose logging on every call.
153 <shortdesc lang="en">Verbose logging</shortdesc>
154 <content type="string" default="false"/>
160 <action name="start" timeout="300s" />
161 <action name="stop" timeout="300s" />
162 <action name="reload" timeout="300s" />
163 <action name="monitor" depth="0" timeout="300s" interval="20s"/>
164 <action name="meta-data" timeout="5" />
165 <action name="validate-all" timeout="30" />
171 #######################################################################
173 ping_conditional_log() {
175 if [ ${OCF_RESKEY_debug} = "true" ]; then
182 usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
184 Expects to have a fully populated OCF RA-compliant environment set.
190 if [ $? = $OCF_SUCCESS ]; then
193 touch ${OCF_RESKEY_pidfile}
199 rm -f ${OCF_RESKEY_pidfile}
200 attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen $attrd_options
205 if [ -f ${OCF_RESKEY_pidfile} ]; then
207 if [ $? -eq 0 ]; then
210 return $OCF_ERR_GENERIC
212 return $OCF_NOT_RUNNING
216 # Is the state directory writable?
217 state_dir=`dirname "$OCF_RESKEY_pidfile"`
218 touch "$state_dir/$$"
220 ocf_log err "Invalid location for 'state': $state_dir is not writable"
225 # Pidfile better be an absolute path
226 case $OCF_RESKEY_pidfile in
228 *) ocf_log warn "You should use an absolute path for pidfile not: $OCF_RESKEY_pidfile" ;;
231 # Check the host list
232 if [ "x" = "x$OCF_RESKEY_host_list" ]; then
233 ocf_log err "Empty host_list. Please specify some nodes to ping"
234 exit $OCF_ERR_CONFIGURED
244 for host in $OCF_RESKEY_host_list; do
247 lctl_out=`$lctl_exe $host $OCF_RESKEY_timeout 2>&1`; rc=$?
249 # ocf_log info "$lctl_exe $host $OCF_RESKEY_timeout"
252 0) active=`expr $active + 1`;;
253 1) ping_conditional_log warn "$host is inactive: $lctl_out";;
254 *) ocf_log err "Unexpected result for '$lctl_exe $host $OCF_RESKEY_timeout' $rc: $p_out";;
266 for host in $OCF_RESKEY_host_list; do
270 Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";;
271 Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";;
272 *) ocf_log err "Unknown host type: `uname`"; exit $OCF_ERR_INSTALLED;;
279 p_out=`$p_exe $p_args $OCF_RESKEY_options $host 2>&1`; rc=$?
282 0) active=`expr $active + 1`;;
283 1) ping_conditional_log warn "$host is inactive: $p_out";;
284 *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";;
291 # first I'm testing if I have the physical link up.
292 # If not I give up without any additional tests.
293 # but first we need to find which is the device we are using on the localhost.
295 CARRIER=/sys/class/net/$OCF_RESKEY_device/carrier
296 OPERSTATE=/sys/class/net/$OCF_RESKEY_device/operstate
298 CAR_STAT=$(cat $CARRIER)
299 OPER_STAT=$(cat $OPERSTATE)
302 # ocf_log info "$CAR_STAT - $OPER_STAT"
304 if [ "$CAR_STAT" == "1" ] && [ "$OPER_STAT" == "up" ]; then
305 if [ ${OCF_RESKEY_lctl} = "true" ]; then
317 # ocf_log info "$active"
319 score=`expr $active \* $OCF_RESKEY_multiplier`
320 attrd_updater -n $OCF_RESKEY_name -v $score -d $OCF_RESKEY_dampen $attrd_options
323 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;;
324 *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";;
326 if [ $rc -ne 0 ]; then
330 if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then
331 ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)"
337 : ${OCF_RESKEY_name:="pingd"}
338 : ${OCF_RESKEY_dampen:="5s"}
339 : ${OCF_RESKEY_attempts:="3"}
340 : ${OCF_RESKEY_multiplier:="1"}
341 : ${OCF_RESKEY_debug:="false"}
342 : ${OCF_RESKEY_lctl:="true"}
343 #: ${OCF_RESKEY_device:="eth1"}
344 : ${OCF_RESKEY_failure_score:="0"}
346 : ${OCF_RESKEY_CRM_meta_timeout:="20000"}
347 : ${OCF_RESKEY_CRM_meta_globally_unique:="true"}
349 integer=`echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*'`
350 case ${OCF_RESKEY_timeout} in
351 *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=`expr $integer / 1000`;;
352 *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=`expr $integer \* 60`;;
353 *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=`expr $integer \* 60 \* 60`;;
354 *) OCF_RESKEY_timeout=$integer;;
357 if [ -z ${OCF_RESKEY_timeout} ]; then
358 if [ x"$OCF_RESKEY_host_list" != x ]; then
359 host_count=`echo $OCF_RESKEY_host_list | awk '{print NF}'`
360 OCF_RESKEY_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts`
361 OCF_RESKEY_timeout=`expr $OCF_RESKEY_timeout / 1100` # Convert to seconds and finish 10% early
367 if [ ${OCF_RESKEY_timeout} -lt 1 ]; then
369 elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then
370 # ping actually complains if this value is too high, 5 minutes is plenty
371 OCF_RESKEY_timeout=300
374 if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
375 : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESKEY_name}"}
377 : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESOURCE_INSTANCE}"}
381 if ocf_is_true ${OCF_RESKEY_debug} ; then
385 # Check the debug option
386 case "${OCF_RESKEY_debug}" in
387 true|True|TRUE|1) OCF_RESKEY_debug=true;;
388 false|False|FALSE|0) OCF_RESKEY_debug=false;;
390 ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}"
391 OCF_RESKEY_debug=false
395 case $__OCF_ACTION in
401 monitor) ping_monitor;;
403 validate-all) ping_validate;;
404 usage|help) ping_usage
408 exit $OCF_ERR_UNIMPLEMENTED