#
# lustre This shell script takes care of starting and stopping Lustre
#
-# chkconfig: 345 99 1
-# description: Lustre Lite network File System. \
+# chkconfig: - 99 1
+# description: Lustre Lite network File System.
# This starts both Lustre client and server functions.
# processname: lconf
# config: /etc/lustre/config.xml
# pidfile: /var/run/lustre.pid
+### BEGIN INIT INFO
+# Provides: lustre
+# Required-Start: $network +sshd
+# Required-Stop: $network
+# Should-Start:
+# Should-Stop:
+# Default-Start:
+# Default-Stop: 0 1 2 3 4 5 6
+# Short-Description: Lustre Lite network File System.
+# Description: This starts both Lustre client and server functions.
+### END INIT INFO
-SERVICE=lustre
-LOCK=/var/lock/subsys/$SERVICE
+
+SERVICE=${0##*/}
: ${LUSTRE_CFG:=/etc/lustre/lustre.cfg}
[ -f ${LUSTRE_CFG} ] && . ${LUSTRE_CFG}
+[ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
: ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml}
-: ${LCONF:=/usr/sbin/lconf}
-: ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
-: ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
+: ${LCONF:=lconf}
+: ${LCTL:=lctl}
+# Some distros use modprobe.conf.local
+if [ -f /etc/modprobe.conf.local ]; then
+ : ${MODPROBE_CONF:=/etc/modprobe.conf.local}
+else
+ : ${MODPROBE_CONF:=/etc/modprobe.conf}
+fi
+# Be sure the proper directories are in PATH.
+export PATH="/sbin:$PATH"
+
+case "$SERVICE" in
+ [SK][[:digit:]][[:digit:]]lustre | lustre)
+ SERVICE="lustre"
+ : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
+ : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
+ ;;
+ *)
+ : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"}
+ : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"}
+ ;;
+esac
+LOCK=/var/lock/subsys/$SERVICE
# Source function library.
if [ -f /etc/init.d/functions ] ; then
- . /etc/init.d/functions
+ . /etc/init.d/functions
fi
# Source networking configuration.
if [ -f /etc/sysconfig/network ] ; then
- . /etc/sysconfig/network
+ . /etc/sysconfig/network
fi
-# Check that networking is up.
-[ "${NETWORKING}" = "no" ] && exit 0
+check_start_stop() {
+ # Exit codes now LSB compliant
+ # Check that networking is up. - exit 'not running'
+ [ "${NETWORKING}" = "no" ] && exit 7
+
+ # exit 'not installed'
+ [ -x ${LCONF} -a -x ${LCTL} ] || exit 5
+
+ if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then
+ if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then
+ echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping."
+ # exit 'not configured'
+ exit 6
+ fi
+ fi
-[ -x ${LCONF} -a -f ${LUSTRE_CONFIG_XML} ] || exit 0
+ # Create /var/lustre directory
+ # This is used by snmp agent for checking lustre services
+ # status online/offline/online pending/offline pending.
+
+ [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR
+ STATUS=${STATUS_DIR}/sysStatus
+}
start() {
- echo -n "Starting $SERVICE: "
- ${LCONF} ${LCONF_START_ARGS}
- RETVAL=$?
- echo $SERVICE
- [ $RETVAL -eq 0 ] && touch $LOCK
+ if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then
+ if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then
+ cat >&2 <<EOF
+This script was run directly, which can be dangerous if you are using
+clumanager to manage Lustre services.
+
+If you are not using clumanager for Lustre services, run the following
+command to have this script start Lustre instead:
+
+touch /etc/lustre/start-despite-clumanager
+EOF
+ RETVAL=6 # program not configured
+ return
+ fi
+ fi
+ check_start_stop
+ echo -n "Starting $SERVICE: "
+ if [ $UID -ne 0 ]; then
+ echo "Lustre should be started as root"
+ RETVAL=4 # insufficent privileges
+ return
+ fi
+ # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
+ ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
+ if [[ ! -z ${ROUTER} ]]; then
+ modprobe lnet
+ ${LCTL} network configure
+ else
+ ${LCONF} ${LCONF_START_ARGS}
+ fi
+ RETVAL=$?
+ echo $SERVICE
+ if [ $RETVAL -eq 0 ]; then
+ touch $LOCK
+ echo "online" >$STATUS
+ else
+ echo "online pending" >$STATUS
+ fi
}
stop() {
- echo -n "Shutting down $SERVICE: "
- ${LCONF} ${LCONF_STOP_ARGS}
+ check_start_stop
+ echo -n "Shutting down $SERVICE: "
+ if [ $UID -ne 0 ]; then
+ echo "Lustre should be stopped as root"
+ RETVAL=4 # insufficent privileges
+ return
+ fi
+ # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
++ ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
+ if [[ ! -z ${ROUTER} ]]; then
+ MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet`
+ if [[ ! -z ${MODULE_LOADED} ]]; then
+ ${LCTL} network unconfigure
+ fi
+ ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
+ # do it again, in case we tried to unload ksocklnd too early
+ ${LCTL} modules | awk '{ print $2 }' | xargs rmmod
+
+ else
+ ${LCONF} ${LCONF_STOP_ARGS}
+ fi
+ RETVAL=$?
echo $SERVICE
- rm -f $LOCK
+ rm -f $LOCK
+ if [ $RETVAL -eq 0 ]; then
+ echo "offline" >$STATUS
+ else
+ echo "offline pending" >$STATUS
+ fi
}
restart() {
start
}
+status() {
+ STATE="stopped"
+ # LSB compliance - return 3 if service is not running
+ # Lustre-specific returns
+ # 150 - partial startup
+ # 151 - health_check unhealthy
+ # 152 - LBUG
+ RETVAL=3
+ egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
+
+ # check for any routes - on a portals router this is the only thing
+ [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
+
+ # check for any configured devices (may indicate partial startup)
+ if [ -d /proc/fs/lustre ]; then
+ [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
+
+ # check for either a server or a client filesystem
+ MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
+ OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
+ LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
+ [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
+ else
+ # check if this is a router
+ if [ -d /proc/sys/lnet ]; then
+ ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`"
+ if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
+ STATE="running"
+ RETVAL=0
+ fi
+ fi
+ fi
+
+ # check for server disconnections
+ DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
+ [ "$DISCON" ] && STATE="disconnected" && RETVAL=0
+
+ # check for servers in recovery
+ [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" && RETVAL=0
+
+ # check for error in health_check
+ HEALTH="/proc/fs/lustre/health_check"
+ [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151
+
+ # check for LBUG
+ [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152
+
+ # If Lustre is up , check if the service really exists
+ # Skip this is we are not checking a specific service
+ if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then
+ DUMMY=$( $LCTL dl | grep "$SERVICE")
+ [ $? -ne 0 ] && STATE="not_found" && RETVAL=3
+ fi
+
+ echo $STATE
+}
+
# See how we were called.
case "$1" in
start)