Whamcloud - gitweb
LU-9439 scripts: Provide a sample lnet.conf file
[fs/lustre-release.git] / lustre / scripts / lustre
index 55c6601..919efb0 100644 (file)
@@ -75,7 +75,6 @@ start_zfs_services ()
 stop_devices ()
 {
        local labels=$*
-       local result=0
        local label devtype
        for label in $labels; do
                devtype=`$LDEV -t $label`
@@ -102,10 +101,6 @@ import_zpool ()
                args="$args -c $cache"
        elif [ -n "$ZPOOL_IMPORT_DIR" ] ; then
                args="$args -d $ZPOOL_IMPORT_DIR"
-       elif [ -d "/dev/disk/by-vdev" ] ; then
-               args="$args -d /dev/disk/by-vdev"
-       elif [ -d "/dev/mapper" ] ; then
-               args="$args -d /dev/mapper"
        fi
 
        if zpool status $pool >/dev/null 2>&1 ; then
@@ -114,6 +109,9 @@ import_zpool ()
                zpool import $pool $args 2>/dev/null
                result=$?
        fi
+       if [ $result -ne 0 ] ; then
+               echo "Unexpected return code from import of pool $pool: $result"
+       fi
        return $result
 }
 
@@ -321,7 +319,7 @@ device_is_active ()
 }
 
 # Usage: mount_one_device <label> <successflag> [devtype]
-# Remove <succesflag> on error (trick to detect errors after parallel runs).
+# Remove <successflag> on error (trick to detect errors after parallel runs).
 mount_one_device ()
 {
        local label=$1
@@ -500,6 +498,7 @@ stop_services ()
 {
        local labels=$*
        local result=0
+       local pids=""
        local dir dev label
 
        for label in $labels; do
@@ -514,9 +513,22 @@ stop_services ()
                        # no error
                        continue
                fi
+
                echo "Unmounting $dir"
-               umount $dir || result=2
+               umount $dir &
+
+               if [ -z "$pids" ]; then
+                       pids="$!"
+               else
+                       pids="$pids $!"
+               fi
        done
+
+       # wait for all umount processes to complete, report any errors
+       for pid in $pids; do
+               wait $pid || result=2
+       done
+
        # double check!
        for label in $labels; do
                if mountpt_is_active $label; then
@@ -583,6 +595,7 @@ stop_lustre_services ()
 # General lustre health check - not device specific.
 health_check ()
 {
+
        old_nullglob="`shopt -p nullglob`"
        shopt -u nullglob
 
@@ -596,55 +609,74 @@ health_check ()
        egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
 
        # check for any configured devices (may indicate partial startup)
-       if [ -d /proc/fs/lustre ]; then
-               if [ -n "`cat /proc/fs/lustre/devices 2> /dev/null`" ] ; then
+       VAR=$(lctl get_param version 2>&1)
+       if [ $? = 0 ] ; then
+               VAR=$(lctl get_param -n devices 2>&1)
+               if [ $? = 0 ] ; then
                        STATE="partial"
                        RETVAL=150
                fi
 
                # check for either a server or a client filesystem
-               MDT="`ls /proc/fs/lustre/mdt/*/recovery_status 2> /dev/null`"
-               OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status \
-                       2> /dev/null`"
-               LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
-               if [ "$MDT" -o "$OST" -o "$LLITE" ]; then
+               local MGT=""
+               local MDT=""
+               local OST=""
+               local LLITE=""
+
+               ! lctl get_param -n mgs.MGS.* >/dev/null 2>&1 || MGT="YES"
+
+               VAR=$(lctl get_param -n mdt.*.recovery_status 2>&1 | grep '^status:'  )
+               if [ $? = 0 ] ; then
+                       MDT=$VAR
+               fi
+
+               VAR=$(lctl get_param -n obdfilter.*.recovery_status 2>&1 | grep '^status:')
+               if [ $? = 0 ] ; then
+                       OST=$VAR
+               fi
+
+               VAR=$(lctl get_param -n llite.fs* 2>&1)
+               if [ $? = 0 ] ; then
+                       LLITE="YES"
+               fi
+
+               if [ "$MGT" -o "$MDT" -o "$OST" -o "$LLITE" ]; then
                        STATE="running"
                        RETVAL=0
                fi
        else
                # check if this is a router
-               if [ -d /proc/sys/lnet ]; then
-                       ROUTER="`cat /proc/sys/lnet/routes | head -1 |
-                               grep -i -c \"Routing enabled\"`"
-                       if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
-                               STATE="running"
-                               RETVAL=0
-                       fi
+               if [[ "$(lctl get_param -n routes)" =~ "Routing enabled" ]]; then
+                       STATE="running"
+                       RETVAL=0
                fi
        fi
 
        # check for server disconnections
-       DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
-       if [ -n "$DISCON" ] ; then
-               STATE="disconnected"
-               RETVAL=0
+       VAR=$(lctl get_param -n *c.*.*server_uuid 2>&1)
+       if [ $? = 0 ] ; then
+               DISCON="$(echo $VAR | grep -v FULL)"
+               if [ -n "$DISCON" ] ; then
+                       STATE="disconnected"
+                       RETVAL=0
+               fi
        fi
 
        # check for servers in recovery
-       if [ -n "$MDT$OST" ] && grep -q RECOV $MDT $OST ; then
+       if [ -n "$MDT$OST" ] && echo $MDT $OST | grep -q RECOV ; then
                STATE="recovery"
                RETVAL=0
        fi
 
        # check for error in health_check
-       HEALTH="/proc/fs/lustre/health_check"
-       if [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH ; then
+       local health_check=$(lctl get_param -n health_check)
+       if [[ "$health_check" =~ "NOT HEALTHY" ]]; then
                STATE="unhealthy"
                RETVAL=1
        fi
 
        # check for LBUG
-       if [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH ; then
+       if [[ "$health_check" =~ "LBUG" ]]; then
                STATE="LBUG"
                RETVAL=152
        fi