Whamcloud - gitweb
0e3acba07cf72df7b8fd0a9a43a7c86efbb66613
[fs/lustre-release.git] / lustre / scripts / lustre
1 #!/bin/sh
2 #
3 # lustre   This shell script takes care of starting and stopping Lustre
4 #
5 # chkconfig: - 99 1
6 # description: Lustre Lite network File System.
7 #              This starts both Lustre client and server functions.
8 # processname: lconf
9 # config: /etc/lustre/config.xml
10 # pidfile: /var/run/lustre.pid
11 ### BEGIN INIT INFO
12 # Provides: lustre
13 # Required-Start: $network +sshd
14 # Required-Stop: $network
15 # Should-Start:
16 # Should-Stop:
17 # Default-Start: 
18 # Default-Stop: 0 1 2 3 4 5 6
19 # Short-Description: Lustre Lite network File System.
20 # Description: This starts both Lustre client and server functions.
21 ### END INIT INFO
22
23
24 SERVICE=${0##*/}
25
26 : ${LUSTRE_CFG:=/etc/lustre/lustre.cfg}
27 [ -f ${LUSTRE_CFG} ] && . ${LUSTRE_CFG}
28 [ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
29
30 : ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml}
31 : ${LCONF:=/usr/sbin/lconf}
32 : ${LCTL:=/usr/sbin/lctl}
33 # Some distros use modprobe.conf.local
34 if [ -f /etc/modprobe.conf.local ]; then
35    : ${MODPROBE_CONF:=/etc/modprobe.conf.local}
36 else
37    : ${MODPROBE_CONF:=/etc/modprobe.conf}
38 fi
39 # Be sure the proper directories are in PATH. 
40 export PATH="/sbin:$PATH"
41
42 case "$SERVICE" in
43     [SK][[:digit:]][[:digit:]]lustre | lustre)
44         SERVICE="lustre"
45         : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
46         : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
47         ;;
48     *)
49         : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"}
50         : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"}
51         ;;
52 esac
53 LOCK=/var/lock/subsys/$SERVICE
54
55 # Source function library.
56 if [ -f /etc/init.d/functions ] ; then
57         . /etc/init.d/functions
58 fi
59
60 # Source networking configuration.
61 if [ -f /etc/sysconfig/network ] ; then
62         . /etc/sysconfig/network
63 fi
64
65 check_start_stop() {
66         # Exit codes now LSB compliant
67         # Check that networking is up. - exit 'not running'
68         [ "${NETWORKING}" = "no" ] && exit 7 
69
70         # exit 'not installed' 
71         [ -x ${LCONF} -a -x ${LCTL} ] || exit 5
72
73         if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then
74                         if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then
75                         echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping."
76                         # exit 'not configured'
77                         exit 6
78                 fi
79         fi
80
81         # Create /var/lustre directory 
82         # This is used by snmp agent for checking lustre services
83         #    status online/offline/online pending/offline pending.
84
85         [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR
86         STATUS=${STATUS_DIR}/sysStatus
87 }
88
89 start() {
90         if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then
91                 if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then
92                 cat >&2 <<EOF
93 This script was run directly, which can be dangerous if you are using
94 clumanager to manage Lustre services.
95
96 If you are not using clumanager for Lustre services, run the following
97 command to have this script start Lustre instead:
98
99 touch /etc/lustre/start-despite-clumanager
100 EOF
101                 RETVAL=6  # program not configured
102                 return
103             fi
104         fi
105         check_start_stop
106         echo -n "Starting $SERVICE: "
107         if [ $UID -ne 0 ]; then
108                 echo "Lustre should be started as root"
109                 RETVAL=4 # insufficent privileges
110                 return
111         fi
112         # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
113         ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
114         if [[ ! -z ${ROUTER} ]]; then
115                 modprobe lnet
116                 ${LCTL} network configure
117         else
118                 ${LCONF} ${LCONF_START_ARGS}
119         fi
120         RETVAL=$?
121         echo $SERVICE
122         if [ $RETVAL -eq 0 ]; then
123                 touch $LOCK
124                 echo "online" >$STATUS
125         else
126                 echo "online pending" >$STATUS
127         fi
128 }
129
130 stop() {
131         check_start_stop
132         echo -n "Shutting down $SERVICE: "
133         if [ $UID -ne 0 ]; then
134                 echo "Lustre should be stopped as root"
135                 RETVAL=4 # insufficent privileges
136                 return
137         fi
138         # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
139 +       ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
140         if [[ ! -z ${ROUTER} ]]; then
141                 MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet`
142                 if [[ ! -z ${MODULE_LOADED} ]]; then
143                         ${LCTL} network unconfigure
144                 fi
145                 ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
146                 # do it again, in case we tried to unload ksocklnd too early
147                 ${LCTL} modules | awk '{ print $2 }' | xargs rmmod
148
149         else
150                 ${LCONF} ${LCONF_STOP_ARGS}
151         fi
152         RETVAL=$?
153         echo $SERVICE
154         rm -f $LOCK 
155         if [ $RETVAL -eq 0 ]; then
156                 echo "offline" >$STATUS
157         else
158                 echo "offline pending" >$STATUS
159         fi
160 }
161
162 restart() {
163         stop
164         start
165 }
166
167 status() {
168         STATE="stopped"
169         # LSB compliance - return 3 if service is not running
170         # Lustre-specific returns
171         # 150 - partial startup
172         # 151 - health_check unhealthy
173         # 152 - LBUG
174         RETVAL=3
175         egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
176
177         # check for any routes - on a portals router this is the only thing
178         [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
179         
180         # check for any configured devices (may indicate partial startup)
181         if [ -d /proc/fs/lustre ]; then
182                 [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
183
184                 # check for either a server or a client filesystem
185                 MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
186                 OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
187                 LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
188                 [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
189         else
190                 # check if this is a router
191                 if [ -d /proc/sys/lnet ]; then
192                         ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`"
193                         if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
194                                 STATE="running"
195                                 RETVAL=0
196                         fi
197                 fi
198         fi
199
200         # check for server disconnections 
201         DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
202         [ "$DISCON" ] && STATE="disconnected" && RETVAL=0
203
204         # check for servers in recovery
205         [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" && RETVAL=0
206
207         # check for error in health_check
208         HEALTH="/proc/fs/lustre/health_check"
209         [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151
210
211         # check for LBUG
212         [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152
213
214         # If Lustre is up , check if the service really exists
215         # Skip this is we are not checking a specific service
216         if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then
217                 DUMMY=`lctl dl | grep $SERVICE`
218                 [ $? -ne 0 ] && STATE="not_found" && RETVAL=3
219         fi
220
221         echo $STATE
222 }
223
224 # See how we were called.
225 case "$1" in
226   start)
227         start
228         ;;
229   stop)
230         stop
231         ;;
232   restart)
233         restart
234         ;;
235   status)
236         status $SERVICE
237         ;;
238   *)
239         echo "Usage: $SERVICE {start|stop|restart|status}"
240         exit 1
241 esac
242
243 exit $RETVAL