X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fscripts%2Flustre;h=73c5b22353047dca664990b28f23886d29804944;hb=dd62978b709aebf3dda536f2230f1b79c5361d9c;hp=b62dc4c92b0a96e7daaa28b8c8157dad8cdf156e;hpb=048001a78ae207af695951910661d2edad825b4e;p=fs%2Flustre-release.git diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre index b62dc4c..73c5b22 100755 --- a/lustre/scripts/lustre +++ b/lustre/scripts/lustre @@ -2,52 +2,161 @@ # # lustre This shell script takes care of starting and stopping Lustre # -# chkconfig: 345 99 1 -# description: Lustre Lite network File System. \ +# chkconfig: - 99 1 +# description: Lustre Lite network File System. # This starts both Lustre client and server functions. # processname: lconf # config: /etc/lustre/config.xml # pidfile: /var/run/lustre.pid +### BEGIN INIT INFO +# Provides: lustre +# Required-Start: $network +sshd +# Required-Stop: $network +# Should-Start: +# Should-Stop: +# Default-Start: +# Default-Stop: 0 1 2 3 4 5 6 +# Short-Description: Lustre Lite network File System. +# Description: This starts both Lustre client and server functions. +### END INIT INFO -SERVICE=lustre -LOCK=/var/lock/subsys/$SERVICE + +SERVICE=${0##*/} : ${LUSTRE_CFG:=/etc/lustre/lustre.cfg} [ -f ${LUSTRE_CFG} ] && . ${LUSTRE_CFG} +[ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre : ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml} -: ${LCONF:=/usr/sbin/lconf} -: ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"} -: ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"} +: ${LCONF:=lconf} +: ${LCTL:=lctl} +# Some distros use modprobe.conf.local +if [ -f /etc/modprobe.conf.local ]; then + : ${MODPROBE_CONF:=/etc/modprobe.conf.local} +else + : ${MODPROBE_CONF:=/etc/modprobe.conf} +fi +# Be sure the proper directories are in PATH. +export PATH="/sbin:$PATH" + +case "$SERVICE" in + [SK][[:digit:]][[:digit:]]lustre | lustre) + SERVICE="lustre" + : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"} + : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"} + ;; + *) + : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"} + : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"} + ;; +esac +LOCK=/var/lock/subsys/$SERVICE # Source function library. if [ -f /etc/init.d/functions ] ; then - . /etc/init.d/functions + . /etc/init.d/functions fi # Source networking configuration. if [ -f /etc/sysconfig/network ] ; then - . /etc/sysconfig/network + . /etc/sysconfig/network fi -# Check that networking is up. -[ "${NETWORKING}" = "no" ] && exit 0 +check_start_stop() { + # Exit codes now LSB compliant + # Check that networking is up. - exit 'not running' + [ "${NETWORKING}" = "no" ] && exit 7 + + # exit 'not installed' + [ -x ${LCONF} -a -x ${LCTL} ] || exit 5 + + if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then + if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then + echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping." + # exit 'not configured' + exit 6 + fi + fi -[ -x ${LCONF} -a -f ${LUSTRE_CONFIG_XML} ] || exit 0 + # Create /var/lustre directory + # This is used by snmp agent for checking lustre services + # status online/offline/online pending/offline pending. + + [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR + STATUS=${STATUS_DIR}/sysStatus +} start() { - echo -n "Starting $SERVICE: " - ${LCONF} ${LCONF_START_ARGS} - RETVAL=$? - echo $SERVICE - [ $RETVAL -eq 0 ] && touch $LOCK + if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then + if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then + cat >&2 <$STATUS + else + echo "online pending" >$STATUS + fi } stop() { - echo -n "Shutting down $SERVICE: " - ${LCONF} ${LCONF_STOP_ARGS} + check_start_stop + echo -n "Shutting down $SERVICE: " + if [ $UID -ne 0 ]; then + echo "Lustre should be stopped as root" + RETVAL=4 # insufficent privileges + return + fi + # Cat the modprobe file and place all lines that follow a trailing backslash on the same line ++ ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"` + if [[ ! -z ${ROUTER} ]]; then + MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet` + if [[ ! -z ${MODULE_LOADED} ]]; then + ${LCTL} network unconfigure + fi + ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 + # do it again, in case we tried to unload ksocklnd too early + ${LCTL} modules | awk '{ print $2 }' | xargs rmmod + + else + ${LCONF} ${LCONF_STOP_ARGS} + fi + RETVAL=$? echo $SERVICE - rm -f $LOCK + rm -f $LOCK + if [ $RETVAL -eq 0 ]; then + echo "offline" >$STATUS + else + echo "offline pending" >$STATUS + fi } restart() { @@ -55,6 +164,63 @@ restart() { start } +status() { + STATE="stopped" + # LSB compliance - return 3 if service is not running + # Lustre-specific returns + # 150 - partial startup + # 151 - health_check unhealthy + # 152 - LBUG + RETVAL=3 + egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded" + + # check for any routes - on a portals router this is the only thing + [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0 + + # check for any configured devices (may indicate partial startup) + if [ -d /proc/fs/lustre ]; then + [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150 + + # check for either a server or a client filesystem + MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`" + OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`" + LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`" + [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0 + else + # check if this is a router + if [ -d /proc/sys/lnet ]; then + ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`" + if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then + STATE="running" + RETVAL=0 + fi + fi + fi + + # check for server disconnections + DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`" + [ "$DISCON" ] && STATE="disconnected" && RETVAL=0 + + # check for servers in recovery + [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" && RETVAL=0 + + # check for error in health_check + HEALTH="/proc/fs/lustre/health_check" + [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151 + + # check for LBUG + [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152 + + # If Lustre is up , check if the service really exists + # Skip this is we are not checking a specific service + if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then + DUMMY=$( $LCTL dl | grep "$SERVICE") + [ $? -ne 0 ] && STATE="not_found" && RETVAL=3 + fi + + echo $STATE +} + # See how we were called. case "$1" in start)