. /etc/sysconfig/network
fi
-# Check that networking is up.
-[ "${NETWORKING}" = "no" ] && exit 0
+check_start_stop() {
+ # Check that networking is up.
+ [ "${NETWORKING}" = "no" ] && exit 0
-[ -x ${LCONF} -a -x ${LCTL} ] || exit 0
+ [ -x ${LCONF} -a -x ${LCTL} ] || exit 0
-[ -f ${LUSTRE_CONFIG_XML} ] || ( echo "unconfigured" && exit 0 )
+ [ -f ${LUSTRE_CONFIG_XML} ] || ( echo "unconfigured" && exit 0 )
-# Create /var/lustre directory
-# This is used by snmp agent for checking lustre services \
-# status online/offline/online pending/offline pending.
+ # Create /var/lustre directory
+ # This is used by snmp agent for checking lustre services
+ # status online/offline/online pending/offline pending.
-[ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR
+ [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR
+}
STATUS=${STATUS_DIR}/sysStatus
start() {
- echo -n "Starting $SERVICE: "
+ check_start_stop
+ echo -n "Starting $SERVICE: "
if [ $UID -ne 0 ]; then
echo "Lustre should be started as root"
RETVAL=1
return
fi
- ${LCONF} ${LCONF_START_ARGS}
- RETVAL=$?
- echo $SERVICE
- if [ $RETVAL -eq 0 ]; then
- touch $LOCK
- echo "online" >$STATUS
- else
- echo "online pending" >$STATUS
- fi
+ ${LCONF} ${LCONF_START_ARGS}
+ RETVAL=$?
+ echo $SERVICE
+ if [ $RETVAL -eq 0 ]; then
+ touch $LOCK
+ echo "online" >$STATUS
+ else
+ echo "online pending" >$STATUS
+ fi
}
stop() {
- echo -n "Shutting down $SERVICE: "
+ check_start_stop
+ echo -n "Shutting down $SERVICE: "
if [ $UID -ne 0 ]; then
echo "Lustre should be stopped as root"
RETVAL=1
return
fi
- ${LCONF} ${LCONF_STOP_ARGS}
- RETVAL=$?
- echo $SERVICE
- rm -f $LOCK
- if [ $RETVAL -eq 0 ]; then
- echo "offline" >$STATUS
- else
- echo "offline pending" >$STATUS
- fi
+ ${LCONF} ${LCONF_STOP_ARGS}
+ RETVAL=$?
+ echo $SERVICE
+ rm -f $LOCK
+ if [ $RETVAL -eq 0 ]; then
+ echo "offline" >$STATUS
+ else
+ echo "offline pending" >$STATUS
+ fi
}
restart() {
}
status() {
- ${LCTL} dl 2>/dev/null | while read INDEX STAT MODULE NAME; do
- case $MODULE in
- ost|mds|osc|mdc)
- [ "`grep -v FULL /proc/fs/lustre/*c/*/*_server_uuid`" ] \
- && echo "recovery" || echo "running"
- return
- ;;
- esac
- done
- echo "stopped"
+ STATE="stopped"
+ egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
+
+ # check for any routes - on a portals router this is the only thing
+ [ "`cat /proc/sys/portals/routes 2> /dev/null`" ] && STATE="running"
+
+ # check for any configured devices (may indicate partial startup)
+ [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial"
+
+ # check for either a server or a client filesystem
+ MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
+ OST="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
+ LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
+ [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running"
+
+ # check for server disconnections
+ DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
+ [ "$DISCON" ] && STATE="disconnected"
+
+ # check for servers in recovery
+ [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery"
+
+ [ "`dmesg | grep LBUG`" ] && STATE="LBUG"
+
+ echo $STATE
}
# See how we were called.