From: Gabriele Paciucci Date: Mon, 8 Aug 2016 16:29:24 +0000 (+0100) Subject: LU-8458 pacemaker: Script to monitor Server status X-Git-Tag: 2.9.56~56 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=d35970fc24d730dab28f28990875fc6244fe116c;p=fs%2Flustre-release.git LU-8458 pacemaker: Script to monitor Server status A new script to be used in Pacemaker to monitor the Lustre Servers status compatible with ZFS and LDISKFS based Lustre server installations. This RA is able to monitor a Lustre Server using the Pacemaker's clone technology. pcs resource create [Resource Name] ocf:lustre:healthLUSTRE dampen=[seconds 5s] --clone where: * dampen The time to wait (dampening) further changes occur This script should be located in /usr/lib/ocf/resource.d/lustre/ of both the Lustre servers with permission 755. Test-Parameters: trivial Signed-off-by: Gabriele Paciucci Change-Id: Ibfbad748e8c1b0c7faecc91984def87002070033 Reviewed-on: https://review.whamcloud.com/22297 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Nathaniel Clark Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/contrib/scripts/pacemaker/healthLUSTRE b/contrib/scripts/pacemaker/healthLUSTRE new file mode 100755 index 0000000..07e0bad --- /dev/null +++ b/contrib/scripts/pacemaker/healthLUSTRE @@ -0,0 +1,220 @@ +#!/bin/sh +# +# +# HealthLUSTRE OCF RA +# + +# License: GNU General Public License (GPL)v2 +# Description: Manages ZFS and Lustre on a shared storage +# Written by: Gabriele Paciucci +# Release Date: 01 November 2016 +# Release Version: 0.99.3 +# Copyright (c) 2009 Andrew Beekhof +# Copyright (c) 2016, Intel Corporation + +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program. +# If not, see +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} +. ${OCF_FUNCTIONS} +: ${__OCF_ACTION=$1} + +####################################################################### + +meta_data() { + cat < + + +0.99.3 + + +Every time the monitor action is run, this resource agent +records (in the CIB) the current number of healthy lustre server + +lustre servers healthy + + + + +PID file +PID file + + + + + +The time to wait (dampening) further changes occur + +Dampening interval + + + + + + +The name of the attributes to set. +This is the name to be used in the constraints. + +Attribute name + + + + + + + +Enables to use default attrd_updater verbose logging on every call. + +Verbose logging + + + + + + + + + + + + + + +END +} + +####################################################################### + +lustre_conditional_log() { + level=$1; shift + if [ ${OCF_RESKEY_debug} = "true" ]; then + ocf_log $level "$*" + fi +} + +lustre_usage() { + cat <&1`; rc=$? + + case $rc in + 0) active=`expr $active + 1`;; + 1) lustre_conditional_log warn "Lustre is not healthy: $l_out";; + *) ocf_log err "Unexpected result for '/proc/fs/lustre/health_check' $rc: $l_out";; + esac + return $active +} + +lustre_update() { + lustre_check + active=$? + + attrd_updater -n $OCF_RESKEY_name -v $active -d $OCF_RESKEY_dampen $attrd_options + rc=$? + case $rc in + 0) lustre_conditional_log debug "Updated $OCF_RESKEY_name = $active" ;; + *) ocf_log warn "Could not update $OCF_RESKEY_name = $active: rc=$rc";; + esac + if [ $rc -ne 0 ]; then + return $rc + fi + return 0 +} + + +if [ ! -f /proc/fs/lustre/health_check ]; then + ocf_log warn "Attention Health_Check file doesn't exist. Lustre will be loaded" + modprobe lustre +fi + + +if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then + : ${OCF_RESKEY_pidfile:="$HA_VARRUN/healthLUSTRE-${OCF_RESKEY_name}"} +else + : ${OCF_RESKEY_pidfile:="$HA_VARRUN/healthLUSTRE-${OCF_RESOURCE_INSTANCE}"} +fi + +attrd_options='-q' +if ocf_is_true ${OCF_RESKEY_debug} ; then + attrd_options='' +fi + +: ${OCF_RESKEY_name:="lustred"} +: ${OCF_RESKEY_debug:="false"} + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +start) lustre_start;; +stop) lustre_stop;; +monitor) lustre_monitor;; +reload) lustre_start;; +validate-all) lustre_usage + exit $OCF_SUCCESS + ;; +usage|help) lustre_usage + exit $OCF_SUCCESS + ;; +*) lustre_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac