From 443826218f0e19777dc93efc7064da2bb0eaa456 Mon Sep 17 00:00:00 2001 From: Olaf Faaland Date: Thu, 17 Sep 2015 12:10:50 -0700 Subject: [PATCH] LU-7179 scripts: init and ha script fixes 1) Because of a typo, Lustre.ha_v2 currently continues running after determining that a bad resource-name has been provided by the user. This commit fixes that typo so that die() is called when the target-name is bad. 2) When a target is in recovery, lustre/scripts/lustre produces improper output when run as follows, and a relevant target is in recovery: /etc/init.d/lustre status /etc/init.d/lustre status local /etc/init.d/lustre status foreign A grep command in health_check() expects variables to contain the path to /proc files containing recovery status, but these variables' contents were altered in a prior commit. e3ddff LU-5030 utils: fix hard-coded /proc/fs/lustre in scripts This commit fixes health_check() to correctly report recovery by obtaining recovery status via lctl and checking that with grep. Signed-off-by: Olaf Faaland Change-Id: I25b8c0d82b637cf9d40feace7d8b964ffcd34251 Reviewed-on: http://review.whamcloud.com/16472 Tested-by: Jenkins Reviewed-by: Andreas Dilger Reviewed-by: Minh Diep Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/scripts/Lustre.ha_v2 | 2 +- lustre/scripts/lustre | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lustre/scripts/Lustre.ha_v2 b/lustre/scripts/Lustre.ha_v2 index 19c7875..fdfc0ef 100644 --- a/lustre/scripts/Lustre.ha_v2 +++ b/lustre/scripts/Lustre.ha_v2 @@ -46,7 +46,7 @@ if [ "`uname -n`-targets" == "$1" ]; then elif [ "`/usr/sbin/ldev -p`-targets" == "$1" ]; then service=foreign else - die: "bad service arg[1]: $*" + die "bad service arg[1]: $*" fi # Until multi-mount protect is implemented for ZFS we allow heartbeat to diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre index 1c27a0a..0f8f082 100644 --- a/lustre/scripts/lustre +++ b/lustre/scripts/lustre @@ -595,6 +595,7 @@ stop_lustre_services () # General lustre health check - not device specific. health_check () { + old_nullglob="`shopt -p nullglob`" shopt -u nullglob @@ -621,14 +622,14 @@ health_check () OST="" LLITE="" - VAR=$(lctl get_param -n mdt.*.recovery_status 2>&1) + VAR=$(lctl get_param -n mdt.*.recovery_status 2>&1 | grep '^status:' ) if [ $? = 0 ] ; then - MDT="YES" + MDT=$VAR fi - VAR=$(lctl get_param -n obdfilter.*.recovery_status 2>&1) + VAR=$(lctl get_param -n obdfilter.*.recovery_status 2>&1 | grep '^status:') if [ $? = 0 ] ; then - OST="YES" + OST=$VAR fi VAR=$(lctl get_param -n llite.fs* 2>&1) @@ -659,7 +660,7 @@ health_check () fi # check for servers in recovery - if [ -n "$MDT$OST" ] && grep -q RECOV $MDT $OST ; then + if [ -n "$MDT$OST" ] && echo $MDT $OST | grep -q RECOV ; then STATE="recovery" RETVAL=0 fi -- 1.8.3.1