3 # lustre This shell script takes care of starting and stopping Lustre
6 # description: Lustre Lite network File System.
7 # This starts both Lustre client and server functions.
9 # config: /etc/lustre/config.xml
10 # pidfile: /var/run/lustre.pid
13 # Required-Start: $network +sshd
14 # Required-Stop: $network
18 # Default-Stop: 0 1 2 3 4 5 6
19 # Short-Description: Lustre Lite network File System.
20 # Description: This starts both Lustre client and server functions.
26 : ${LUSTRE_CFG:=/etc/lustre/lustre.cfg}
27 [ -f ${LUSTRE_CFG} ] && . ${LUSTRE_CFG}
28 [ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
30 : ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml}
33 # Some distros use modprobe.conf.local
34 if [ -f /etc/modprobe.conf.local ]; then
35 : ${MODPROBE_CONF:=/etc/modprobe.conf.local}
37 : ${MODPROBE_CONF:=/etc/modprobe.conf}
39 # Be sure the proper directories are in PATH.
40 export PATH="/sbin:$PATH"
43 [SK][[:digit:]][[:digit:]]lustre | lustre)
45 : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
46 : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
49 : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"}
50 : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"}
53 LOCK=/var/lock/subsys/$SERVICE
55 # Source function library.
56 if [ -f /etc/init.d/functions ] ; then
57 . /etc/init.d/functions
60 # Source networking configuration.
61 if [ -f /etc/sysconfig/network ] ; then
62 . /etc/sysconfig/network
66 # Exit codes now LSB compliant
67 # Check that networking is up. - exit 'not running'
68 [ "${NETWORKING}" = "no" ] && exit 7
70 # exit 'not installed'
71 [ -x ${LCONF} -a -x ${LCTL} ] || exit 5
73 if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then
74 if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then
75 echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping."
76 # exit 'not configured'
81 # Create /var/lustre directory
82 # This is used by snmp agent for checking lustre services
83 # status online/offline/online pending/offline pending.
85 [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR
86 STATUS=${STATUS_DIR}/sysStatus
90 if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then
91 if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then
93 This script was run directly, which can be dangerous if you are using
94 clumanager to manage Lustre services.
96 If you are not using clumanager for Lustre services, run the following
97 command to have this script start Lustre instead:
99 touch /etc/lustre/start-despite-clumanager
101 RETVAL=6 # program not configured
106 echo -n "Starting $SERVICE: "
107 if [ $UID -ne 0 ]; then
108 echo "Lustre should be started as root"
109 RETVAL=4 # insufficent privileges
112 # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
113 ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
114 if [[ ! -z ${ROUTER} ]]; then
116 ${LCTL} network configure
118 ${LCONF} ${LCONF_START_ARGS}
122 if [ $RETVAL -eq 0 ]; then
124 echo "online" >$STATUS
126 echo "online pending" >$STATUS
132 echo -n "Shutting down $SERVICE: "
133 if [ $UID -ne 0 ]; then
134 echo "Lustre should be stopped as root"
135 RETVAL=4 # insufficent privileges
138 # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
139 + ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
140 if [[ ! -z ${ROUTER} ]]; then
141 MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet`
142 if [[ ! -z ${MODULE_LOADED} ]]; then
143 ${LCTL} network unconfigure
145 ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
146 # do it again, in case we tried to unload ksocklnd too early
147 ${LCTL} modules | awk '{ print $2 }' | xargs rmmod
150 ${LCONF} ${LCONF_STOP_ARGS}
155 if [ $RETVAL -eq 0 ]; then
156 echo "offline" >$STATUS
158 echo "offline pending" >$STATUS
169 # LSB compliance - return 3 if service is not running
170 # Lustre-specific returns
171 # 150 - partial startup
172 # 151 - health_check unhealthy
175 egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
177 # check for any routes - on a portals router this is the only thing
178 [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
180 # check for any configured devices (may indicate partial startup)
181 if [ -d /proc/fs/lustre ]; then
182 [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
184 # check for either a server or a client filesystem
185 MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
186 OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
187 LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
188 [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
190 # check if this is a router
191 if [ -d /proc/sys/lnet ]; then
192 ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`"
193 if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
200 # check for server disconnections
201 DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
202 [ "$DISCON" ] && STATE="disconnected" && RETVAL=0
204 # check for servers in recovery
205 [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" && RETVAL=0
207 # check for error in health_check
208 HEALTH="/proc/fs/lustre/health_check"
209 [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151
212 [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152
214 # If Lustre is up , check if the service really exists
215 # Skip this is we are not checking a specific service
216 if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then
217 DUMMY=$( $LCTL dl | grep "$SERVICE")
218 [ $? -ne 0 ] && STATE="not_found" && RETVAL=3
224 # See how we were called.
239 echo "Usage: $SERVICE {start|stop|restart|status}"