From: adilger Date: Tue, 12 Apr 2005 09:19:27 +0000 (+0000) Subject: Branch: b1_4 X-Git-Tag: v1_8_0_110~486^7~27 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=aada8bf2ee74c0c8110d4854d51beccfdcdbcff3;p=fs%2Flustre-release.git Branch: b1_4 Fix somewhat broken "lustre status" output, which previously always output "running" or "recovery" then "stopped". Now it checks for servers and clients in recovery and also if modules are loaded (which can be a sign of problems). It will check status regardless of whether there is a config.xml file as not all sites are configured that way. --- diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre index 4b58c07..b6398c1 100755 --- a/lustre/scripts/lustre +++ b/lustre/scripts/lustre @@ -43,54 +43,58 @@ if [ -f /etc/sysconfig/network ] ; then . /etc/sysconfig/network fi -# Check that networking is up. -[ "${NETWORKING}" = "no" ] && exit 0 +check_start_stop() { + # Check that networking is up. + [ "${NETWORKING}" = "no" ] && exit 0 -[ -x ${LCONF} -a -x ${LCTL} ] || exit 0 + [ -x ${LCONF} -a -x ${LCTL} ] || exit 0 -[ -f ${LUSTRE_CONFIG_XML} ] || ( echo "unconfigured" && exit 0 ) + [ -f ${LUSTRE_CONFIG_XML} ] || ( echo "unconfigured" && exit 0 ) -# Create /var/lustre directory -# This is used by snmp agent for checking lustre services \ -# status online/offline/online pending/offline pending. + # Create /var/lustre directory + # This is used by snmp agent for checking lustre services + # status online/offline/online pending/offline pending. -[ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR + [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR +} STATUS=${STATUS_DIR}/sysStatus start() { - echo -n "Starting $SERVICE: " + check_start_stop + echo -n "Starting $SERVICE: " if [ $UID -ne 0 ]; then echo "Lustre should be started as root" RETVAL=1 return fi - ${LCONF} ${LCONF_START_ARGS} - RETVAL=$? - echo $SERVICE - if [ $RETVAL -eq 0 ]; then - touch $LOCK - echo "online" >$STATUS - else - echo "online pending" >$STATUS - fi + ${LCONF} ${LCONF_START_ARGS} + RETVAL=$? + echo $SERVICE + if [ $RETVAL -eq 0 ]; then + touch $LOCK + echo "online" >$STATUS + else + echo "online pending" >$STATUS + fi } stop() { - echo -n "Shutting down $SERVICE: " + check_start_stop + echo -n "Shutting down $SERVICE: " if [ $UID -ne 0 ]; then echo "Lustre should be stopped as root" RETVAL=1 return fi - ${LCONF} ${LCONF_STOP_ARGS} - RETVAL=$? - echo $SERVICE - rm -f $LOCK - if [ $RETVAL -eq 0 ]; then - echo "offline" >$STATUS - else - echo "offline pending" >$STATUS - fi + ${LCONF} ${LCONF_STOP_ARGS} + RETVAL=$? + echo $SERVICE + rm -f $LOCK + if [ $RETVAL -eq 0 ]; then + echo "offline" >$STATUS + else + echo "offline pending" >$STATUS + fi } restart() { @@ -99,16 +103,31 @@ restart() { } status() { - ${LCTL} dl 2>/dev/null | while read INDEX STAT MODULE NAME; do - case $MODULE in - ost|mds|osc|mdc) - [ "`grep -v FULL /proc/fs/lustre/*c/*/*_server_uuid`" ] \ - && echo "recovery" || echo "running" - return - ;; - esac - done - echo "stopped" + STATE="stopped" + egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded" + + # check for any routes - on a portals router this is the only thing + [ "`cat /proc/sys/portals/routes 2> /dev/null`" ] && STATE="running" + + # check for any configured devices (may indicate partial startup) + [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" + + # check for either a server or a client filesystem + MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`" + OST="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`" + LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`" + [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" + + # check for server disconnections + DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`" + [ "$DISCON" ] && STATE="disconnected" + + # check for servers in recovery + [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" + + [ "`dmesg | grep LBUG`" ] && STATE="LBUG" + + echo $STATE } # See how we were called. diff --git a/lustre/scripts/lustrefs b/lustre/scripts/lustrefs index 78b1155..af2e675 100644 --- a/lustre/scripts/lustrefs +++ b/lustre/scripts/lustrefs @@ -62,7 +62,7 @@ lustre_action () { } LUSTREFSTAB=`LC_ALL=C awk '!/^#/ && $3 == "lustre" && $4 !~ /noauto/ { print $2 }' /etc/fstab` -LUSTREMTAB=`LC_ALL=C awk '!/^#/ && $3 == "lustre" { print $2 }' /proc/mounts` +LUSTREMTAB=`LC_ALL=C awk '!/^#/ && ($3 ~ "lustre") { print $2 }' /proc/mounts` # See how we were called. case "$1" in