#!/bin/sh # # lustre This shell script takes care of starting and stopping Lustre # # chkconfig: - 99 1 # description: Lustre Lite network File System. # This starts both Lustre client and server functions. # processname: lconf # config: /etc/lustre/config.xml # pidfile: /var/run/lustre.pid ### BEGIN INIT INFO # Provides: lustre # Required-Start: $network +sshd # Required-Stop: $network # Should-Start: # Should-Stop: # Default-Start: # Default-Stop: 0 1 2 3 4 5 6 # Short-Description: Lustre Lite network File System. # Description: This starts both Lustre client and server functions. ### END INIT INFO SERVICE=${0##*/} : ${LUSTRE_CFG:=/etc/lustre/lustre.cfg} [ -f ${LUSTRE_CFG} ] && . ${LUSTRE_CFG} [ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre : ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml} : ${LCONF:=lconf} : ${LCTL:=lctl} # Some distros use modprobe.conf.local if [ -f /etc/modprobe.conf.local ]; then : ${MODPROBE_CONF:=/etc/modprobe.conf.local} else : ${MODPROBE_CONF:=/etc/modprobe.conf} fi # Be sure the proper directories are in PATH. export PATH="/sbin:$PATH" case "$SERVICE" in [SK][[:digit:]][[:digit:]]lustre | lustre) SERVICE="lustre" : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"} : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"} ;; *) : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"} : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"} ;; esac LOCK=/var/lock/subsys/$SERVICE # Source function library. if [ -f /etc/init.d/functions ] ; then . /etc/init.d/functions fi # Source networking configuration. if [ -f /etc/sysconfig/network ] ; then . /etc/sysconfig/network fi check_start_stop() { # Exit codes now LSB compliant # Check that networking is up. - exit 'not running' [ "${NETWORKING}" = "no" ] && exit 7 # exit 'not installed' [ -x ${LCONF} -a -x ${LCTL} ] || exit 5 if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping." # exit 'not configured' exit 6 fi fi # Create /var/lustre directory # This is used by snmp agent for checking lustre services # status online/offline/online pending/offline pending. [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR STATUS=${STATUS_DIR}/sysStatus } start() { if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then cat >&2 <$STATUS else echo "online pending" >$STATUS fi } stop() { check_start_stop echo -n "Shutting down $SERVICE: " if [ $UID -ne 0 ]; then echo "Lustre should be stopped as root" RETVAL=4 # insufficent privileges return fi # Cat the modprobe file and place all lines that follow a trailing backslash on the same line + ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"` if [[ ! -z ${ROUTER} ]]; then MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet` if [[ ! -z ${MODULE_LOADED} ]]; then ${LCTL} network unconfigure fi ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 # do it again, in case we tried to unload ksocklnd too early ${LCTL} modules | awk '{ print $2 }' | xargs rmmod else ${LCONF} ${LCONF_STOP_ARGS} fi RETVAL=$? echo $SERVICE rm -f $LOCK if [ $RETVAL -eq 0 ]; then echo "offline" >$STATUS else echo "offline pending" >$STATUS fi } restart() { stop start } status() { STATE="stopped" # LSB compliance - return 3 if service is not running # Lustre-specific returns # 150 - partial startup # 151 - health_check unhealthy # 152 - LBUG RETVAL=3 egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded" # check for any routes - on a portals router this is the only thing [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0 # check for any configured devices (may indicate partial startup) if [ -d /proc/fs/lustre ]; then [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150 # check for either a server or a client filesystem MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`" OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`" LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`" [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0 else # check if this is a router if [ -d /proc/sys/lnet ]; then ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`" if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then STATE="running" RETVAL=0 fi fi fi # check for server disconnections DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`" [ "$DISCON" ] && STATE="disconnected" && RETVAL=0 # check for servers in recovery [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" && RETVAL=0 # check for error in health_check HEALTH="/proc/fs/lustre/health_check" [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151 # check for LBUG [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152 # If Lustre is up , check if the service really exists # Skip this is we are not checking a specific service if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then DUMMY=$( $LCTL dl | grep "$SERVICE") [ $? -ne 0 ] && STATE="not_found" && RETVAL=3 fi echo $STATE } # See how we were called. case "$1" in start) start ;; stop) stop ;; restart) restart ;; status) status $SERVICE ;; *) echo "Usage: $SERVICE {start|stop|restart|status}" exit 1 esac exit $RETVAL