Whamcloud - gitweb
eeb5941380537c044e5176c906a9521566790ebd
[fs/lustre-release.git] / lustre / scripts / lustre
1 #!/bin/bash
2 #
3 # lustre        This shell script takes care of starting and stopping
4 #              the lustre services.
5 #
6 # chkconfig: - 60 20
7 # description:  Part of the lustre file system.
8 # probe: true
9 # config: /etc/sysconfig/lustre
10
11 PATH=/sbin:/usr/sbin:/bin:/usr/bin
12
13 # Source function library.
14 . /etc/rc.d/init.d/functions
15
16 # Source networking configuration.
17 if [ ! -f /etc/sysconfig/network ]; then
18         exit 0
19 fi
20
21 . /etc/sysconfig/network
22
23 LDEV=${LDEV:-"/usr/sbin/ldev"}
24 ZPOOL_LAYOUT=/usr/bin/zpool_layout
25 UDEVADM=${UDEVADM:-/sbin/udevadm}
26
27 # Check that networking is up.
28 [ "${NETWORKING}" = "no" ] && exit 0
29
30 # Check for and source configuration file otherwise set defaults
31 [ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
32 FSCK_ARGS=${FSCK_ARGS:-""}
33 MOUNT_OPTIONS=${MOUNT_OPTIONS:-""}
34 LOCAL_SRV=${LOCAL_SRV:-"`$LDEV -l 2>/dev/null`"}
35 FOREIGN_SRV=${FOREIGN_SRV:-"`$LDEV -f 2>/dev/null`"}
36 REQUIRE_MMP_FEATURE=${REQUIRE_MMP_FEATURE:-${FOREIGN_SRV:+"yes"}}
37 LOCAL_MOUNT_DIR=${LOCAL_MOUNT_DIR:-"/mnt/lustre/local"}
38 FOREIGN_MOUNT_DIR=${FOREIGN_MOUNT_DIR:-"/mnt/lustre/foreign"}
39 SETUP_DEVICES=${SETUP_DEVICES:-""}
40 ZPOOL_LAYOUT_BUSES=${ZPOOL_LAYOUT_BUSES:-""}
41 ZPOOL_LAYOUT_PORTS=${ZPOOL_LAYOUT_PORTS:-""}
42 ZPOOL_LAYOUT_MAP=${ZPOOL_LAYOUT_MAP:-""}
43 MOUNT_DELAY=${MOUNT_DELAY:-2}
44 LOAD_ZFS=${LOAD_ZFS:-""}
45
46 if [ -z "$TUNE2FS" ] ; then
47         TUNE2FS=`which tunefs.ldiskfs 2>/dev/null`
48         if [ -z "$TUNE2FS" ] ; then
49                 TUNE2FS=`which tune2fs 2>/dev/null`
50         fi
51 fi
52
53 if [ -z "$PFSCK" ] ; then
54         PFSCK=`which pfsck.ldiskfs 2>/dev/null`
55         if [ -z "$PFSCK" ] ; then
56                 PFSCK=`which fsck 2>/dev/null`
57         fi
58 fi
59
60 shopt -s nullglob
61
62 start_zfs_services ()
63 {
64         if [ -n "$ZPOOL_LAYOUT_BUSES" -a -n "$ZPOOL_LAYOUT_PORTS" ] ; then
65                 MAP_ARG=${ZPOOL_LAYOUT_MAP:+"-m $ZPOOL_LAYOUT_MAP"}
66                 $ZPOOL_LAYOUT -t -b "$ZPOOL_LAYOUT_BUSES" \
67                         -p "$ZPOOL_LAYOUT_PORTS" $MAP_ARG
68         fi
69         if [ "$LOAD_ZFS" = "yes" ] && ! modprobe zfs ; then
70                 echo "Failed to load zfs module.  Aborting."
71                 exit 1
72         fi
73 }
74
75 stop_devices ()
76 {
77         local labels=$*
78         local label devtype
79         for label in $labels; do
80                 devtype=`$LDEV -t $label`
81                 if [ "$devtype" = "zfs" ] ; then
82                         export_zpool $label
83                 elif [ "$devtype" = "md" ] ; then
84                         dev=`label_to_device $label`
85                         journal=`$LDEV -j $label`
86                         stop_md_device $dev
87                         stop_md_device $journal
88                 fi
89         done
90 }
91
92 import_zpool ()
93 {
94         local result=1
95         local label=$1
96         local pool=`$LDEV -z $label`
97         local args="-N $ZPOOL_IMPORT_ARGS"
98         local cache=`$LDEV -r $label`
99         # -c is incompatible with -d
100         if [ -n "$cache" ] ; then
101                 args="$args -c $cache"
102         elif [ -n "$ZPOOL_IMPORT_DIR" ] ; then
103                 args="$args -d $ZPOOL_IMPORT_DIR"
104         fi
105
106         if zpool status $pool >/dev/null 2>&1 ; then
107                 result=0
108         elif [ -n "$pool" ] ; then
109                 zpool import $pool $args 2>/dev/null
110                 result=$?
111         fi
112         if [ $result -ne 0 ] ; then
113                 echo "Unexpected return code from import of pool $pool: $result"
114         fi
115         return $result
116 }
117
118 export_zpool ()
119 {
120         local label=$1
121         local pool=`$LDEV -z $label`
122         zpool export $pool 2>/dev/null
123 }
124
125 # Trigger udev and wait for it to settle.
126 udev_trigger()
127 {
128         if [ -x ${UDEVADM} ]; then
129                 ${UDEVADM} trigger --action=change --subsystem-match=block
130                 ${UDEVADM} settle
131         else
132                 /sbin/udevtrigger
133                 /sbin/udevsettle
134         fi
135 }
136
137 # Usage: run_preexec_check [ start | restart | condrestart ]
138 # The single parameter will be passed to the PREEXEC_SCRIPT
139 run_preexec_check ()
140 {
141         if [ -n "$PREEXEC_CHECK" ] && ! $PREEXEC_CHECK ; then
142                 echo "Pre-exec check \"$PREEXEC_CHECK\" failed.  Aborting."
143                 exit 1
144         fi
145
146         if [ -n "$PREEXEC_SCRIPT" ] && ! "$PREEXEC_SCRIPT" "$1" ; then
147                 echo "Pre-exec script \"$PREEXEC_SCRIPT\" failed.  Aborting."
148                 exit 1
149         fi
150 }
151
152 # Usage: run_postexec_check [ start | restart | condrestart ]
153 # The single parameter will be passed to the PREEXEC_SCRIPT
154 run_postexec_check ()
155 {
156         if [ -n "$POSTEXEC_CHECK" ] && ! $POSTEXEC_CHECK ; then
157                 echo "Post-exec check \"$POSTEXEC_CHECK\" failed.  Aborting."
158                 exit 1
159         fi
160
161         if [ -n "$POSTEXEC_SCRIPT" ] && ! "$POSTEXEC_SCRIPT" "$1" ; then
162                 echo "Post-exec script \"$POSTEXEC_SCRIPT\" failed.  Aborting."
163                 exit 1
164         fi
165 }
166
167 # Usage: adjust_scsi_timeout <dev>
168 adjust_scsi_timeout ()
169 {
170         local dev=$1
171
172         if [ -n "$SCSI_DEVICE_TIMEOUT" ]; then
173                 # make sure that it is actually a SCSI (sd) device
174                 local name=`basename $dev`
175                 local proc=/sys/block/${name}/device/timeout
176                 local driver=`readlink /sys/block/${name}/device/driver`
177                 if [ -n "$driver" ] && [ "`basename $driver`" == "sd" ]; then
178                         if ! echo $SCSI_DEVICE_TIMEOUT >$proc; then
179                                 echo "FAILED: could not adjust ${dev} timeout"
180                                 return 1
181                         fi
182                 fi
183         fi
184         return 0
185 }
186
187 # Usage: fsck_test <dev> [ <dev> ... ]
188 # Checks all devices in parallel if FSCK_ARGS is set.
189 fsck_test ()
190 {
191         local devices="$*"
192
193         # Filter out non-absolute paths, which are probably ZFS datasets
194         devices=`echo $devices |xargs -n 1|grep '^/'|xargs`
195
196         if [ -n "${FSCK_ARGS}" -a -n "$devices" ]; then
197                 if [ -x $PFSCK ] ; then
198                         echo "$PFSCK $devices -- ${FSCK_ARGS}"
199                         $PFSCK $devices -- ${FSCK_ARGS}
200                         if [ $? -ne 0 -a $? -ne 1 ] ; then
201                                 echo "FAILED: $PFSCK -- ${FSCK_ARGS}: $?"
202                                 return 1
203                         fi
204                 else
205                         echo "$PFSCK not found"
206                         return 1
207                 fi
208         fi
209         return 0
210 }
211
212 # Usage: test_feature_flag <dev> <flag>
213 test_feature_flag()
214 {
215         local dev=$1
216         local flag=$2
217         local result=1
218         local feature
219
220         for feature in `$TUNE2FS -l $dev 2>/dev/null \
221                                 | grep features: | sed -e 's/^.*: //'`; do
222                 if [ "$feature" == "$flag" ]; then
223                         result=0
224                         break
225                 fi
226         done
227
228         return $result
229 }
230
231 # Usage: mmp_test <dev>
232 # Returns 0 if it is set or not required, 1 if unset and required or error.
233 mmp_test ()
234 {
235         local dev=$1
236         local result=0
237
238         if [ "$REQUIRE_MMP_FEATURE" == "yes" ]; then
239                 if [ -x $TUNE2FS ]; then
240                         if ! test_feature_flag $dev "mmp"; then
241                                 echo "mmp feature flag is not set on $dev"
242                                 result=1
243                         fi
244                 else
245                         echo "$TUNE2FS not found"
246                         result=1
247                 fi
248         fi
249
250         return $result
251 }
252
253 # Usage: label_to_mountpt <label>
254 # Prints mount point path, if label matches a local or foreign server.
255 label_to_mountpt ()
256 {
257         local label=$1
258         local serv
259
260         for serv in $LOCAL_SRV; do
261                 if [ "$serv" == "$label" ]; then
262                         echo "$LOCAL_MOUNT_DIR/$label"
263                         return
264                 fi
265         done
266         for serv in $FOREIGN_SRV; do
267                 if [ "$serv" == "$label" ]; then
268                         echo "$FOREIGN_MOUNT_DIR/$label"
269                         return
270                 fi
271         done
272 }
273
274 # Usage: label_to_device <label>
275 # Prints canonical device path.
276 label_to_device ()
277 {
278         local label=$1
279         local path=/dev/disk/by-label/$label
280
281         if [ -h $path ] ; then
282                 readlink --canonicalize $path
283         else
284                 $LDEV -d $label
285         fi
286 }
287
288 # helper for mountpt_is_active() and device_is_active()
289 declare -r awkprog='BEGIN {rc = 1;}
290                         { if ($field == path) {rc = 0;} }
291                     END { exit rc;}'
292
293 # Usage: mountpt_is_active <label>
294 # Return 1 (inactive) on invalid label.
295 mountpt_is_active ()
296 {
297         local dir=`label_to_mountpt $1`
298         local result=1
299
300         if [ -n "$dir" ]; then
301                 cat /proc/mounts | awk "$awkprog" field=2 path=$dir
302                 result=$?
303         fi
304         return $result
305 }
306
307 # Usage: device_is_active <label>
308 # Return 1 (inactive) on invalid label.
309 device_is_active ()
310 {
311         local dev=`label_to_device $1`
312         local result=1
313
314         if [ -n "$dev" ]; then
315                 cat /proc/mounts | awk "$awkprog" field=1 path=$dir
316                 result=$?
317         fi
318         return $result
319 }
320
321 # Usage: mount_one_device <label> <successflag> [devtype]
322 # Remove <succesflag> on error (trick to detect errors after parallel runs).
323 mount_one_device ()
324 {
325         local label=$1
326         local successflag=$2
327         local devtype=$3
328         local dev=`label_to_device $label`
329         local dir=`label_to_mountpt $label`
330
331         # $dir and $dev have already been checked at ths point
332         if [ ! -d $dir ] && ! mkdir -p $dir; then
333                 rm -f $successflag
334                 return
335         fi
336         echo "Mounting $dev on $dir"
337         if ! mount -t lustre $MOUNT_OPTIONS $dev $dir; then
338                 rm -f $successflag
339                 return
340         fi
341 }
342
343 # Usage: assemble_md_device <device>
344 # Assemble the md device backing device.
345 # Return 0 if the array is assembled successfully or was already active,
346 # otherwise return error code from mdadm.
347 assemble_md_device ()
348 {
349         local dev=$1
350         local raidtab=$2
351         local args="-Aq"
352         local result=0
353
354         if [ -n "$raidtab" ] ; then
355                 args="$args -c $raidtab"
356         fi
357
358         if ! md_array_is_active $dev ; then
359                 mdadm $args $dev
360                 result=$?
361         fi
362
363         udev_trigger
364         return $result
365 }
366
367 # Usage: stop_md_device <device>
368 # Stop the md device backing device.
369 # Return 0 if the array is stopped successfully or was not active,
370 # otherwise return error code from mdadm.
371 stop_md_device ()
372 {
373         local dev=$1
374         local raidtab=$2
375         local args="-Sq"
376         local result=0
377
378         if [ -n "$raidtab" ] ; then
379                 args="$args -c $raidtab"
380         fi
381
382         if [ -e $dev ] && md_array_is_active $dev ; then
383                 mdadm $args $dev
384                 result=$?
385         fi
386
387         return $result
388 }
389
390 # Usage: md_array_is_active <device>
391 # return 0 if device is an active md RAID array, or 1 otherwise
392 md_array_is_active ()
393 {
394         local device=$1
395
396         [ -e "$device" ] || return 1
397
398         mdadm --detail -t $device > /dev/null 2>&1
399         if [ $? -eq 4 ] ; then
400                 return 1
401         fi
402         return 0
403 }
404
405 # Usage: start_services <label> [ <label> ... ]
406 # fsck and mount any devices listed as arguments (in parallel).
407 # Attempt to assemble software raid arrays or zfs pools backing
408 # Lustre devices.
409 start_services ()
410 {
411         local result=0
412         local devices=""
413         local dir dev label
414         local successflag
415         local labels
416
417         start_zfs_services
418         for label in $*; do
419                 dir=`label_to_mountpt $label`
420                 devtype=`$LDEV -t $label`
421                 dev=`label_to_device $label`
422                 journal=`$LDEV -j $label`
423                 raidtab=`$LDEV -r $label`
424
425                 if [ -z "$dir" ] || [ -z "$dev" ]; then
426                         echo "$label is not a valid lustre label on this node"
427                         result=2
428                         continue
429                 fi
430
431                 if [ "$devtype" = "md" ] ; then
432                         if ! assemble_md_device $dev $raidtab ; then
433                                 echo "failed to assemble array $dev backing $label"
434                                 result=2
435                                 continue
436                         fi
437                 elif [ "$devtype" = "zfs" ] ; then
438                         if ! import_zpool $label ; then
439                                 result=2
440                         fi
441                 fi
442
443                 # Journal device field in ldev.conf may be "-" or empty,
444                 # so only attempt to assemble if its an absolute path.
445                 # Ignore errors since the journal device may not be an
446                 # md device.
447                 if echo $journal | grep -q ^/ ; then
448                         assemble_md_device $journal $raidtab 2>/dev/null
449                 fi
450
451                 if [ "x$devtype" != "xzfs" ] ; then
452                         if mountpt_is_active $label || \
453                            device_is_active $label; then
454                                 echo "$label is already mounted"
455                                 # no error
456                                 continue
457                         fi
458                         if ! mmp_test $dev; then
459                                 result=2
460                                 continue
461                         fi
462                         if ! adjust_scsi_timeout $dev; then
463                                 result=2
464                                 continue
465                         fi
466                 fi
467                 devices="$devices $dev"
468                 labels="$labels $label"
469         done
470         if [ $result == 0 ]; then
471                 fsck_test $devices || return 2
472
473                 # Fork to handle multiple mount_one_device()'s in parallel.
474                 # Errors occurred if $successflag comes up missing afterwards.
475                 successflag=`mktemp`
476                 [ -e $successflag ] || return 2
477                 for label in $labels; do
478                         mount_one_device $label $successflag `$LDEV -t $label` &
479                         # stagger to avoid module loading races
480                         if [[ -n $MOUNT_DELAY && $MOUNT_DELAY -gt 0 ]] ; then
481                                 sleep $MOUNT_DELAY
482                         fi
483                 done
484                 for label in $labels; do
485                         wait
486                 done
487                 [ -e $successflag ] || return 2
488                 rm -f $successflag
489         fi
490
491         return $result
492 }
493
494 # Usage: stop_services <label> [ <label> ... ]
495 # Unmount any devices listed as arguments (serially).
496 # Any devices which are not mounted or don't exist are skipped with no error.
497 stop_services ()
498 {
499         local labels=$*
500         local result=0
501         local pids=""
502         local dir dev label
503
504         for label in $labels; do
505                 dir=`label_to_mountpt $label`
506                 if [ -z "$dir" ]; then
507                         echo "$label is not a valid lustre label on this node"
508                         result=2
509                         continue
510                 fi
511                 if ! mountpt_is_active $label; then
512                         #echo "$label is not mounted"
513                         # no error
514                         continue
515                 fi
516
517                 echo "Unmounting $dir"
518                 umount $dir &
519
520                 if [ -z "$pids" ]; then
521                         pids="$!"
522                 else
523                         pids="$pids $!"
524                 fi
525         done
526
527         # wait for all umount processes to complete, report any errors
528         for pid in $pids; do
529                 wait $pid || result=2
530         done
531
532         # double check!
533         for label in $labels; do
534                 if mountpt_is_active $label; then
535                         dir=`label_to_mountpt $label`
536                         echo "Mount point $dir is still active"
537                         result=2
538                 fi
539                 if device_is_active $label; then
540                         dev=`label_to_device $label`
541                         echo "Device $dev is still active"
542                         result=2
543                 fi
544         done
545         stop_devices $labels
546
547         return $result
548 }
549
550 # Usage: start_lustre_services [local|foreign|all|<label>]
551 # If no parameter is specified, local devices will be started.
552 start_lustre_services ()
553 {
554         local labels=""
555
556         case "$1" in
557                 ""|local)
558                         labels=$LOCAL_SRV
559                         ;;
560                 foreign)
561                         labels=$FOREIGN_SRV
562                         ;;
563                 all)    labels="$LOCAL_SRV $FOREIGN_SRV"
564                         ;;
565                 *)      labels="$1"
566                         ;;
567         esac
568         # for use by heartbeat V1 resource agent:
569         # starting an already-started service must not be an error
570         start_services $labels || exit 2
571 }
572
573 # Usage: stop_lustre_services [local|foreign|all|<label>]
574 # If no parameter is specified all devices will be stopped.
575 stop_lustre_services ()
576 {
577         local labels=""
578
579         case "$1" in
580                 local) labels=$LOCAL_SRV
581                         ;;
582                 foreign)
583                         labels=$FOREIGN_SRV
584                         ;;
585                 ""|all) labels="$LOCAL_SRV $FOREIGN_SRV"
586                         ;;
587                 *)      labels="$1"
588                         ;;
589         esac
590         # for use by heartbeat V1 resource agent:
591         # stopping already-stopped service must not be an error
592         stop_services $labels || exit 2
593 }
594
595 # General lustre health check - not device specific.
596 health_check ()
597 {
598         old_nullglob="`shopt -p nullglob`"
599         shopt -u nullglob
600
601         STATE="stopped"
602         # LSB compliance - return 3 if service is not running
603         # Lustre-specific returns
604         # 150 - partial startup
605         # 151 - health_check unhealthy
606         # 152 - LBUG
607         RETVAL=3
608         egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
609
610         # check for any configured devices (may indicate partial startup)
611         if [ -d /proc/fs/lustre ]; then
612                 if [ -n "`cat /proc/fs/lustre/devices 2> /dev/null`" ] ; then
613                         STATE="partial"
614                         RETVAL=150
615                 fi
616
617                 # check for either a server or a client filesystem
618                 MDT="`ls /proc/fs/lustre/mdt/*/recovery_status 2> /dev/null`"
619                 OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status \
620                         2> /dev/null`"
621                 LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
622                 if [ "$MDT" -o "$OST" -o "$LLITE" ]; then
623                         STATE="running"
624                         RETVAL=0
625                 fi
626         else
627                 # check if this is a router
628                 if [ -d /proc/sys/lnet ]; then
629                         ROUTER="`cat /proc/sys/lnet/routes | head -1 |
630                                 grep -i -c \"Routing enabled\"`"
631                         if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
632                                 STATE="running"
633                                 RETVAL=0
634                         fi
635                 fi
636         fi
637
638         # check for server disconnections
639         DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
640         if [ -n "$DISCON" ] ; then
641                 STATE="disconnected"
642                 RETVAL=0
643         fi
644
645         # check for servers in recovery
646         if [ -n "$MDT$OST" ] && grep -q RECOV $MDT $OST ; then
647                 STATE="recovery"
648                 RETVAL=0
649         fi
650
651         # check for error in health_check
652         HEALTH="/proc/fs/lustre/health_check"
653         if [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH ; then
654                 STATE="unhealthy"
655                 RETVAL=1
656         fi
657
658         # check for LBUG
659         if [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH ; then
660                 STATE="LBUG"
661                 RETVAL=152
662         fi
663
664         echo $STATE
665         eval $old_nullglob
666         return $RETVAL
667 }
668
669 # Usage: status [local|foreign|all|<label>]
670 # If no parameter is specified, general lustre health status will be reported.
671 status ()
672 {
673         local labels=""
674         local label dir
675         local valid_devs=0
676
677         case "$1" in
678                 local) labels=$LOCAL_SRV;
679                         ;;
680                 foreign)
681                         labels=$FOREIGN_SRV;
682                         ;;
683                 all)    labels="$LOCAL_SRV $FOREIGN_SRV"
684                         ;;
685                 "")     # ASSUMPTION: this is not the heartbeat res agent
686                         health_check
687                         exit $?
688                         ;;
689                 *)      labels=$1
690                         ;;
691         esac
692         # for use by heartbeat V1 resource agent:
693         # print "running" if *anything* is running.
694         for label in $labels; do
695                 dir=`label_to_device $label`
696                 if [ -z "$dir" ]; then
697                         echo "$label is not a valid lustre label on this node"
698                         # no error
699                         continue
700                 fi
701                 valid_devs=1
702                 if mountpt_is_active $label || device_is_active $label; then
703                         echo "running"
704                         exit 0
705                 fi
706         done
707         [ $valid_devs == 1 ] && echo "stopped"
708         exit 3
709 }
710
711 usage ()
712 {
713         cat <<EOF
714 Usage: lustre {start|stop|status|restart|reload|condrestart}
715
716        lustre start  [local|foreign|<label>]
717        lustre stop   [local|foreign|<label>]
718        lustre status [local|foreign|<label>]
719 EOF
720         exit 1
721 }
722
723 # See how we were called.
724 case "$1" in
725   start)
726         if [ $# -gt 2 ] ; then
727                 echo "ERROR: Too many arguments."
728                 usage
729         fi
730         run_preexec_check "start"
731         start_lustre_services $2
732         run_postexec_check "start"
733         ;;
734   stop)
735         if [ $# -gt 2 ] ; then
736                 echo "ERROR: Too many arguments."
737                 usage
738         fi
739         run_preexec_check "stop"
740         stop_lustre_services $2
741         run_postexec_check "stop"
742         ;;
743   status)
744         if [ $# -gt 2 ] ; then
745                 echo "ERROR: Too many arguments."
746                 usage
747         fi
748         status $2
749         ;;
750   restart)
751         $0 stop
752         $0 start
753         ;;
754   reload)
755         ;;
756   probe)
757         ;;
758   condrestart)
759         if grep lustre /proc/mounts ; then
760                 $0 stop
761                 $0 start
762         fi
763         ;;
764   *)
765         usage
766 esac
767
768 exit 0