--- /dev/null
+#!/bin/sh
+#
+#
+# LNet OCF RA
+#
+
+# License: GNU General Public License (GPL)v2
+# Description: Manages ZFS and Lustre on a shared storage
+# Written by: Gabriele Paciucci
+# Release Date: 01 September 2016
+# Release Version: 0.99
+
+# Copyright (c) 2009 Andrew Beekhof
+# Copyright (c) 2016, Intel Corporation
+
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
+. ${OCF_FUNCTIONS}
+: ${__OCF_ACTION=$1}
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="healthLNET">
+<version>0.99</version>
+
+<longdesc lang="en">
+Every time the monitor action is run, this resource agent records (in the CIB)
+the current number of lctl ping nodes the host can connect to.
+</longdesc>
+<shortdesc lang="en">LNet connectivity</shortdesc>
+
+<parameters>
+
+<parameter name="pidfile" unique="0">
+<longdesc lang="en">PID file</longdesc>
+<shortdesc lang="en">PID file</shortdesc>
+<content type="string" default="$HA_VARRUN/ping-${OCF_RESOURCE_INSTANCE}" />
+</parameter>
+
+<parameter name="dampen" unique="0">
+<longdesc lang="en">
+The time to wait (dampening) further changes occur
+</longdesc>
+<shortdesc lang="en">Dampening interval</shortdesc>
+<content type="integer" default="5s"/>
+</parameter>
+
+<parameter name="name" unique="0">
+<longdesc lang="en">
+The name of the attributes to set. This is the name to be used in the constraints.
+</longdesc>
+<shortdesc lang="en">Attribute name</shortdesc>
+<content type="string" default="pingd"/>
+</parameter>
+
+<parameter name="multiplier" unique="0">
+<longdesc lang="en">
+The number by which to multiply the number of connected ping nodes by
+</longdesc>
+<shortdesc lang="en">Value multiplier</shortdesc>
+<content type="integer" default=""/>
+</parameter>
+
+<parameter name="host_list" unique="0" required="1">
+<longdesc lang="en">
+The list of ping nodes to count.
+</longdesc>
+<shortdesc lang="en">Host list</shortdesc>
+<content type="string" default=""/>
+</parameter>
+
+<parameter name="attempts" unique="0">
+<longdesc lang="en">
+Number of ping attempts, per host, before declaring it dead
+</longdesc>
+<shortdesc lang="en">no. of ping attempts</shortdesc>
+<content type="integer" default="2"/>
+</parameter>
+
+<parameter name="timeout" unique="0">
+<longdesc lang="en">
+How long, in seconds, to wait before declaring a ping lost
+</longdesc>
+<shortdesc lang="en">ping timeout in seconds</shortdesc>
+<content type="integer" default="2"/>
+</parameter>
+
+<parameter name="lctl" unique="0">
+<longdesc lang="en">
+Option to enable lctl ping. The default is true
+</longdesc>
+<shortdesc lang="en">Extra Options</shortdesc>
+<content type="string" default="true"/>
+</parameter>
+
+<parameter name="device" unique="0">
+<longdesc lang="en">
+Device used for the LNET network. We assume the same device accross the cluster
+</longdesc>
+<shortdesc lang="en">LNET device</shortdesc>
+<content type="string" default=""/>
+</parameter>
+
+
+<parameter name="options" unique="0">
+<longdesc lang="en">
+A catch all for any other options that need to be passed to ping.
+</longdesc>
+<shortdesc lang="en">Extra Options</shortdesc>
+<content type="string" default=""/>
+</parameter>
+
+<parameter name="failure_score" unique="0">
+<longdesc lang="en">
+Resource is failed if the score is less than failure_score.
+Default never fails.
+</longdesc>
+<shortdesc lang="en">failure_score</shortdesc>
+<content type="integer" default=""/>
+</parameter>
+
+<parameter name="debug" unique="0">
+<longdesc lang="en">
+Enables to use default attrd_updater verbose logging on every call.
+</longdesc>
+<shortdesc lang="en">Verbose logging</shortdesc>
+<content type="string" default="false"/>
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="300s" />
+<action name="stop" timeout="300s" />
+<action name="reload" timeout="300s" />
+<action name="monitor" depth="0" timeout="300s" interval="20s"/>
+<action name="meta-data" timeout="5" />
+<action name="validate-all" timeout="30" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+ping_conditional_log() {
+ level=$1; shift
+ if [ ${OCF_RESKEY_debug} = "true" ]; then
+ ocf_log $level "$*"
+ fi
+}
+
+ping_usage() {
+ cat <<END
+ usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
+
+ Expects to have a fully populated OCF RA-compliant environment set.
+ END
+}
+
+ping_start() {
+ ping_monitor
+ if [ $? = $OCF_SUCCESS ]; then
+ return $OCF_SUCCESS
+ fi
+ touch ${OCF_RESKEY_pidfile}
+ ping_update
+}
+
+ping_stop() {
+
+ rm -f ${OCF_RESKEY_pidfile}
+ attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen $attrd_options
+ return $OCF_SUCCESS
+}
+
+ping_monitor() {
+ if [ -f ${OCF_RESKEY_pidfile} ]; then
+ ping_update
+ if [ $? -eq 0 ]; then
+ return $OCF_SUCCESS
+ fi
+ return $OCF_ERR_GENERIC
+ fi
+ return $OCF_NOT_RUNNING
+}
+
+ping_validate() {
+ # Is the state directory writable?
+ state_dir=`dirname "$OCF_RESKEY_pidfile"`
+ touch "$state_dir/$$"
+ if [ $? != 0 ]; then
+ ocf_log err "Invalid location for 'state': $state_dir is not writable"
+ return $OCF_ERR_ARGS
+ fi
+ rm "$state_dir/$$"
+
+# Pidfile better be an absolute path
+ case $OCF_RESKEY_pidfile in
+ /*) ;;
+ *) ocf_log warn "You should use an absolute path for pidfile not: $OCF_RESKEY_pidfile" ;;
+ esac
+
+# Check the host list
+ if [ "x" = "x$OCF_RESKEY_host_list" ]; then
+ ocf_log err "Empty host_list. Please specify some nodes to ping"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ check_binary ping
+
+ return $OCF_SUCCESS
+}
+
+lctl_check() {
+ active=0
+ for host in $OCF_RESKEY_host_list; do
+ lctl_exe="lctl ping"
+
+ lctl_out=`$lctl_exe $host $OCF_RESKEY_timeout 2>&1`; rc=$?
+ # debug
+ # ocf_log info "$lctl_exe $host $OCF_RESKEY_timeout"
+
+ case $rc in
+ 0) active=`expr $active + 1`;;
+ 1) ping_conditional_log warn "$host is inactive: $lctl_out";;
+ *) ocf_log err "Unexpected result for '$lctl_exe $host $OCF_RESKEY_timeout' $rc: $p_out";;
+ esac
+ done
+ return $active
+
+
+}
+
+
+
+ping_check() {
+ active=0
+ for host in $OCF_RESKEY_host_list; do
+ p_exe=ping
+
+ case `uname` in
+ Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";;
+ Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";;
+ *) ocf_log err "Unknown host type: `uname`"; exit $OCF_ERR_INSTALLED;;
+ esac
+
+ case $host in
+ *:*) p_exe=ping6
+ esac
+
+ p_out=`$p_exe $p_args $OCF_RESKEY_options $host 2>&1`; rc=$?
+
+ case $rc in
+ 0) active=`expr $active + 1`;;
+ 1) ping_conditional_log warn "$host is inactive: $p_out";;
+ *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";;
+ esac
+ done
+ return $active
+}
+
+ping_update() {
+ # first I'm testing if I have the physical link up.
+ # If not I give up without any additional tests.
+ # but first we need to find which is the device we are using on the localhost.
+
+ CARRIER=/sys/class/net/$OCF_RESKEY_device/carrier
+ OPERSTATE=/sys/class/net/$OCF_RESKEY_device/operstate
+
+ CAR_STAT=$(cat $CARRIER)
+ OPER_STAT=$(cat $OPERSTATE)
+
+ # debug
+ # ocf_log info "$CAR_STAT - $OPER_STAT"
+
+ if [ "$CAR_STAT" == "1" ] && [ "$OPER_STAT" == "up" ]; then
+ if [ ${OCF_RESKEY_lctl} = "true" ]; then
+ lctl_check
+ active=$?
+ else
+ ping_check
+ active=$?
+ fi
+ else
+ active=0
+ fi
+
+ # debug
+ # ocf_log info "$active"
+
+ score=`expr $active \* $OCF_RESKEY_multiplier`
+ attrd_updater -n $OCF_RESKEY_name -v $score -d $OCF_RESKEY_dampen $attrd_options
+ rc=$?
+ case $rc in
+ 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;;
+ *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";;
+ esac
+ if [ $rc -ne 0 ]; then
+ return $rc
+ fi
+
+ if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then
+ ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)"
+ return 1
+ fi
+ return 0
+}
+
+: ${OCF_RESKEY_name:="pingd"}
+: ${OCF_RESKEY_dampen:="5s"}
+: ${OCF_RESKEY_attempts:="3"}
+: ${OCF_RESKEY_multiplier:="1"}
+: ${OCF_RESKEY_debug:="false"}
+: ${OCF_RESKEY_lctl:="true"}
+#: ${OCF_RESKEY_device:="eth1"}
+: ${OCF_RESKEY_failure_score:="0"}
+
+: ${OCF_RESKEY_CRM_meta_timeout:="20000"}
+: ${OCF_RESKEY_CRM_meta_globally_unique:="true"}
+
+integer=`echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*'`
+case ${OCF_RESKEY_timeout} in
+ *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=`expr $integer / 1000`;;
+ *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=`expr $integer \* 60`;;
+ *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=`expr $integer \* 60 \* 60`;;
+ *) OCF_RESKEY_timeout=$integer;;
+esac
+
+if [ -z ${OCF_RESKEY_timeout} ]; then
+ if [ x"$OCF_RESKEY_host_list" != x ]; then
+ host_count=`echo $OCF_RESKEY_host_list | awk '{print NF}'`
+ OCF_RESKEY_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts`
+ OCF_RESKEY_timeout=`expr $OCF_RESKEY_timeout / 1100` # Convert to seconds and finish 10% early
+ else
+ OCF_RESKEY_timeout=5
+ fi
+fi
+
+if [ ${OCF_RESKEY_timeout} -lt 1 ]; then
+ OCF_RESKEY_timeout=5
+elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then
+ # ping actually complains if this value is too high, 5 minutes is plenty
+ OCF_RESKEY_timeout=300
+fi
+
+if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
+ : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESKEY_name}"}
+else
+ : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESOURCE_INSTANCE}"}
+fi
+
+attrd_options='-q'
+if ocf_is_true ${OCF_RESKEY_debug} ; then
+ attrd_options=''
+fi
+
+# Check the debug option
+case "${OCF_RESKEY_debug}" in
+ true|True|TRUE|1) OCF_RESKEY_debug=true;;
+ false|False|FALSE|0) OCF_RESKEY_debug=false;;
+ *)
+ ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}"
+ OCF_RESKEY_debug=false
+ ;;
+esac
+
+case $__OCF_ACTION in
+meta-data) meta_data
+ exit $OCF_SUCCESS
+ ;;
+start) ping_start;;
+stop) ping_stop;;
+monitor) ping_monitor;;
+reload) ping_start;;
+validate-all) ping_validate;;
+usage|help) ping_usage
+ exit $OCF_SUCCESS
+ ;;
+*) ping_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac