Whamcloud - gitweb
LU-1347 lnet: makes EXPORT_SYMBOL follows function body
[fs/lustre-release.git] / lustre / scripts / lustre
index 95c1d06..73c5b22 100755 (executable)
 #
 # lustre   This shell script takes care of starting and stopping Lustre
 #
-# chkconfig: 345 99 1
-# description: Lustre Lite network File System.                              \
+# chkconfig: - 99 1
+# description: Lustre Lite network File System.
 #              This starts both Lustre client and server functions.
 # processname: lconf
 # config: /etc/lustre/config.xml
 # pidfile: /var/run/lustre.pid
+### BEGIN INIT INFO
+# Provides: lustre
+# Required-Start: $network +sshd
+# Required-Stop: $network
+# Should-Start:
+# Should-Stop:
+# Default-Start: 
+# Default-Stop: 0 1 2 3 4 5 6
+# Short-Description: Lustre Lite network File System.
+# Description: This starts both Lustre client and server functions.
+### END INIT INFO
 
-SERVICE=lustre
-LOCK=/var/lock/subsys/$SERVICE
+
+SERVICE=${0##*/}
 
 : ${LUSTRE_CFG:=/etc/lustre/lustre.cfg}
 [ -f ${LUSTRE_CFG} ] && . ${LUSTRE_CFG}
+[ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
 
 : ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml}
-: ${LCONF:=/usr/sbin/lconf}
-: ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
-: ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
+: ${LCONF:=lconf}
+: ${LCTL:=lctl}
+# Some distros use modprobe.conf.local
+if [ -f /etc/modprobe.conf.local ]; then
+   : ${MODPROBE_CONF:=/etc/modprobe.conf.local}
+else
+   : ${MODPROBE_CONF:=/etc/modprobe.conf}
+fi
+# Be sure the proper directories are in PATH. 
+export PATH="/sbin:$PATH"
+
+case "$SERVICE" in
+    [SK][[:digit:]][[:digit:]]lustre | lustre)
+        SERVICE="lustre"
+       : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
+       : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
+       ;;
+    *)
+       : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"}
+       : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"}
+       ;;
+esac
+LOCK=/var/lock/subsys/$SERVICE
 
 # Source function library.
 if [ -f /etc/init.d/functions ] ; then
-   . /etc/init.d/functions
+       . /etc/init.d/functions
 fi
 
 # Source networking configuration.
 if [ -f /etc/sysconfig/network ] ; then
-   . /etc/sysconfig/network
+       . /etc/sysconfig/network
 fi
 
-# Check that networking is up.
-[ "${NETWORKING}" = "no" ] && exit 0
+check_start_stop() {
+       # Exit codes now LSB compliant
+       # Check that networking is up. - exit 'not running'
+       [ "${NETWORKING}" = "no" ] && exit 7 
+
+       # exit 'not installed' 
+       [ -x ${LCONF} -a -x ${LCTL} ] || exit 5
 
-[ -x ${LCONF} -a -f ${LUSTRE_CONFIG_XML} ] || exit 0
+       if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then
+                       if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then
+                       echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping."
+                       # exit 'not configured'
+                       exit 6
+               fi
+       fi
 
-# Create /var/lustre directory 
-# This is used by snmp agent for checking lustre services       \
-#    status online/offline/online pending/offline pending.
+       # Create /var/lustre directory 
+       # This is used by snmp agent for checking lustre services
+       #    status online/offline/online pending/offline pending.
 
-[ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR
-STATUS=${STATUS_DIR}/sysStatus
+       [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR
+       STATUS=${STATUS_DIR}/sysStatus
+}
 
 start() {
-        echo -n "Starting $SERVICE: "
-        ${LCONF} ${LCONF_START_ARGS}
-        RETVAL=$?
-        echo $SERVICE
-        if [ $RETVAL -eq 0 ]; then
-               touch $LOCK
-              echo "online" >$STATUS
-        else
-              echo "online pending" >$STATUS
-        fi
+       if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then
+               if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then
+               cat >&2 <<EOF
+This script was run directly, which can be dangerous if you are using
+clumanager to manage Lustre services.
+
+If you are not using clumanager for Lustre services, run the following
+command to have this script start Lustre instead:
+
+touch /etc/lustre/start-despite-clumanager
+EOF
+               RETVAL=6  # program not configured
+               return
+           fi
+       fi
+       check_start_stop
+       echo -n "Starting $SERVICE: "
+       if [ $UID -ne 0 ]; then
+               echo "Lustre should be started as root"
+               RETVAL=4 # insufficent privileges
+               return
+       fi
+       # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
+       ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
+       if [[ ! -z ${ROUTER} ]]; then
+               modprobe lnet
+               ${LCTL} network configure
+       else
+               ${LCONF} ${LCONF_START_ARGS}
+       fi
+       RETVAL=$?
+       echo $SERVICE
+       if [ $RETVAL -eq 0 ]; then
+               touch $LOCK
+               echo "online" >$STATUS
+       else
+               echo "online pending" >$STATUS
+       fi
 }
 
 stop() {
-        echo -n "Shutting down $SERVICE: "
-        ${LCONF} ${LCONF_STOP_ARGS}
-        RETVAL=$?
-        echo $SERVICE
-        rm -f $LOCK 
-        if [ $RETVAL -eq 0 ]; then
-              echo "offline" >$STATUS
-        else
-              echo "offline pending" >$STATUS
-        fi
+       check_start_stop
+       echo -n "Shutting down $SERVICE: "
+       if [ $UID -ne 0 ]; then
+               echo "Lustre should be stopped as root"
+               RETVAL=4 # insufficent privileges
+               return
+       fi
+       # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
++      ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
+       if [[ ! -z ${ROUTER} ]]; then
+               MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet`
+               if [[ ! -z ${MODULE_LOADED} ]]; then
+                       ${LCTL} network unconfigure
+               fi
+               ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
+               # do it again, in case we tried to unload ksocklnd too early
+               ${LCTL} modules | awk '{ print $2 }' | xargs rmmod
+
+       else
+               ${LCONF} ${LCONF_STOP_ARGS}
+       fi
+       RETVAL=$?
+       echo $SERVICE
+       rm -f $LOCK 
+       if [ $RETVAL -eq 0 ]; then
+               echo "offline" >$STATUS
+       else
+               echo "offline pending" >$STATUS
+       fi
 }
 
 restart() {
@@ -73,6 +164,63 @@ restart() {
        start
 }
 
+status() {
+       STATE="stopped"
+       # LSB compliance - return 3 if service is not running
+       # Lustre-specific returns
+       # 150 - partial startup
+       # 151 - health_check unhealthy
+       # 152 - LBUG
+       RETVAL=3
+       egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
+
+       # check for any routes - on a portals router this is the only thing
+       [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
+       
+       # check for any configured devices (may indicate partial startup)
+       if [ -d /proc/fs/lustre ]; then
+               [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
+
+               # check for either a server or a client filesystem
+               MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
+               OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
+               LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
+               [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
+       else
+               # check if this is a router
+               if [ -d /proc/sys/lnet ]; then
+                       ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`"
+                       if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
+                               STATE="running"
+                               RETVAL=0
+                       fi
+               fi
+       fi
+
+       # check for server disconnections 
+       DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
+       [ "$DISCON" ] && STATE="disconnected" && RETVAL=0
+
+       # check for servers in recovery
+       [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" && RETVAL=0
+
+       # check for error in health_check
+       HEALTH="/proc/fs/lustre/health_check"
+       [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151
+
+       # check for LBUG
+       [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152
+
+       # If Lustre is up , check if the service really exists
+        # Skip this is we are not checking a specific service
+       if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then
+               DUMMY=$( $LCTL dl | grep "$SERVICE")
+               [ $? -ne 0 ] && STATE="not_found" && RETVAL=3
+       fi
+
+       echo $STATE
+}
+
 # See how we were called.
 case "$1" in
   start)