Whamcloud - gitweb
55c66013900d17f3af4f0010978fc149d2b4bbd5
[fs/lustre-release.git] / lustre / scripts / lustre
1 #!/bin/bash
2 #
3 # lustre        This shell script takes care of starting and stopping
4 #              the lustre services.
5 #
6 # chkconfig: - 60 20
7 # description:  Part of the lustre file system.
8 # probe: true
9 # config: /etc/sysconfig/lustre
10
11 PATH=/sbin:/usr/sbin:/bin:/usr/bin
12
13 # Source function library.
14 . /etc/rc.d/init.d/functions
15
16 # Source networking configuration.
17 if [ ! -f /etc/sysconfig/network ]; then
18         exit 0
19 fi
20
21 . /etc/sysconfig/network
22
23 LDEV=${LDEV:-"/usr/sbin/ldev"}
24 ZPOOL_LAYOUT=/usr/bin/zpool_layout
25 UDEVADM=${UDEVADM:-/sbin/udevadm}
26
27 # Check that networking is up.
28 [ "${NETWORKING}" = "no" ] && exit 0
29
30 # Check for and source configuration file otherwise set defaults
31 [ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
32 FSCK_ARGS=${FSCK_ARGS:-""}
33 MOUNT_OPTIONS=${MOUNT_OPTIONS:-""}
34 LOCAL_SRV=${LOCAL_SRV:-"`$LDEV -l 2>/dev/null`"}
35 FOREIGN_SRV=${FOREIGN_SRV:-"`$LDEV -f 2>/dev/null`"}
36 REQUIRE_MMP_FEATURE=${REQUIRE_MMP_FEATURE:-${FOREIGN_SRV:+"yes"}}
37 LOCAL_MOUNT_DIR=${LOCAL_MOUNT_DIR:-"/mnt/lustre/local"}
38 FOREIGN_MOUNT_DIR=${FOREIGN_MOUNT_DIR:-"/mnt/lustre/foreign"}
39 SETUP_DEVICES=${SETUP_DEVICES:-""}
40 ZPOOL_LAYOUT_BUSES=${ZPOOL_LAYOUT_BUSES:-""}
41 ZPOOL_LAYOUT_PORTS=${ZPOOL_LAYOUT_PORTS:-""}
42 ZPOOL_LAYOUT_MAP=${ZPOOL_LAYOUT_MAP:-""}
43 MOUNT_DELAY=${MOUNT_DELAY:-2}
44 LOAD_ZFS=${LOAD_ZFS:-""}
45
46 if [ -z "$TUNE2FS" ] ; then
47         TUNE2FS=`which tunefs.ldiskfs 2>/dev/null`
48         if [ -z "$TUNE2FS" ] ; then
49                 TUNE2FS=`which tune2fs 2>/dev/null`
50         fi
51 fi
52
53 if [ -z "$PFSCK" ] ; then
54         PFSCK=`which pfsck.ldiskfs 2>/dev/null`
55         if [ -z "$PFSCK" ] ; then
56                 PFSCK=`which fsck 2>/dev/null`
57         fi
58 fi
59
60 shopt -s nullglob
61
62 start_zfs_services ()
63 {
64         if [ -n "$ZPOOL_LAYOUT_BUSES" -a -n "$ZPOOL_LAYOUT_PORTS" ] ; then
65                 MAP_ARG=${ZPOOL_LAYOUT_MAP:+"-m $ZPOOL_LAYOUT_MAP"}
66                 $ZPOOL_LAYOUT -t -b "$ZPOOL_LAYOUT_BUSES" \
67                         -p "$ZPOOL_LAYOUT_PORTS" $MAP_ARG
68         fi
69         if [ "$LOAD_ZFS" = "yes" ] && ! modprobe zfs ; then
70                 echo "Failed to load zfs module.  Aborting."
71                 exit 1
72         fi
73 }
74
75 stop_devices ()
76 {
77         local labels=$*
78         local result=0
79         local label devtype
80         for label in $labels; do
81                 devtype=`$LDEV -t $label`
82                 if [ "$devtype" = "zfs" ] ; then
83                         export_zpool $label
84                 elif [ "$devtype" = "md" ] ; then
85                         dev=`label_to_device $label`
86                         journal=`$LDEV -j $label`
87                         stop_md_device $dev
88                         stop_md_device $journal
89                 fi
90         done
91 }
92
93 import_zpool ()
94 {
95         local result=1
96         local label=$1
97         local pool=`$LDEV -z $label`
98         local args="-N $ZPOOL_IMPORT_ARGS"
99         local cache=`$LDEV -r $label`
100         # -c is incompatible with -d
101         if [ -n "$cache" ] ; then
102                 args="$args -c $cache"
103         elif [ -n "$ZPOOL_IMPORT_DIR" ] ; then
104                 args="$args -d $ZPOOL_IMPORT_DIR"
105         elif [ -d "/dev/disk/by-vdev" ] ; then
106                 args="$args -d /dev/disk/by-vdev"
107         elif [ -d "/dev/mapper" ] ; then
108                 args="$args -d /dev/mapper"
109         fi
110
111         if zpool status $pool >/dev/null 2>&1 ; then
112                 result=0
113         elif [ -n "$pool" ] ; then
114                 zpool import $pool $args 2>/dev/null
115                 result=$?
116         fi
117         return $result
118 }
119
120 export_zpool ()
121 {
122         local label=$1
123         local pool=`$LDEV -z $label`
124         zpool export $pool 2>/dev/null
125 }
126
127 # Trigger udev and wait for it to settle.
128 udev_trigger()
129 {
130         if [ -x ${UDEVADM} ]; then
131                 ${UDEVADM} trigger --action=change --subsystem-match=block
132                 ${UDEVADM} settle
133         else
134                 /sbin/udevtrigger
135                 /sbin/udevsettle
136         fi
137 }
138
139 # Usage: run_preexec_check [ start | restart | condrestart ]
140 # The single parameter will be passed to the PREEXEC_SCRIPT
141 run_preexec_check ()
142 {
143         if [ -n "$PREEXEC_CHECK" ] && ! $PREEXEC_CHECK ; then
144                 echo "Pre-exec check \"$PREEXEC_CHECK\" failed.  Aborting."
145                 exit 1
146         fi
147
148         if [ -n "$PREEXEC_SCRIPT" ] && ! "$PREEXEC_SCRIPT" "$1" ; then
149                 echo "Pre-exec script \"$PREEXEC_SCRIPT\" failed.  Aborting."
150                 exit 1
151         fi
152 }
153
154 # Usage: run_postexec_check [ start | restart | condrestart ]
155 # The single parameter will be passed to the PREEXEC_SCRIPT
156 run_postexec_check ()
157 {
158         if [ -n "$POSTEXEC_CHECK" ] && ! $POSTEXEC_CHECK ; then
159                 echo "Post-exec check \"$POSTEXEC_CHECK\" failed.  Aborting."
160                 exit 1
161         fi
162
163         if [ -n "$POSTEXEC_SCRIPT" ] && ! "$POSTEXEC_SCRIPT" "$1" ; then
164                 echo "Post-exec script \"$POSTEXEC_SCRIPT\" failed.  Aborting."
165                 exit 1
166         fi
167 }
168
169 # Usage: adjust_scsi_timeout <dev>
170 adjust_scsi_timeout ()
171 {
172         local dev=$1
173
174         if [ -n "$SCSI_DEVICE_TIMEOUT" ]; then
175                 # make sure that it is actually a SCSI (sd) device
176                 local name=`basename $dev`
177                 local proc=/sys/block/${name}/device/timeout
178                 local driver=`readlink /sys/block/${name}/device/driver`
179                 if [ -n "$driver" ] && [ "`basename $driver`" == "sd" ]; then
180                         if ! echo $SCSI_DEVICE_TIMEOUT >$proc; then
181                                 echo "FAILED: could not adjust ${dev} timeout"
182                                 return 1
183                         fi
184                 fi
185         fi
186         return 0
187 }
188
189 # Usage: fsck_test <dev> [ <dev> ... ]
190 # Checks all devices in parallel if FSCK_ARGS is set.
191 fsck_test ()
192 {
193         local devices="$*"
194
195         # Filter out non-absolute paths, which are probably ZFS datasets
196         devices=`echo $devices |xargs -n 1|grep '^/'|xargs`
197
198         if [ -n "${FSCK_ARGS}" -a -n "$devices" ]; then
199                 if [ -x $PFSCK ] ; then
200                         echo "$PFSCK $devices -- ${FSCK_ARGS}"
201                         $PFSCK $devices -- ${FSCK_ARGS}
202                         if [ $? -ne 0 -a $? -ne 1 ] ; then
203                                 echo "FAILED: $PFSCK -- ${FSCK_ARGS}: $?"
204                                 return 1
205                         fi
206                 else
207                         echo "$PFSCK not found"
208                         return 1
209                 fi
210         fi
211         return 0
212 }
213
214 # Usage: test_feature_flag <dev> <flag>
215 test_feature_flag()
216 {
217         local dev=$1
218         local flag=$2
219         local result=1
220         local feature
221
222         for feature in `$TUNE2FS -l $dev 2>/dev/null \
223                                 | grep features: | sed -e 's/^.*: //'`; do
224                 if [ "$feature" == "$flag" ]; then
225                         result=0
226                         break
227                 fi
228         done
229
230         return $result
231 }
232
233 # Usage: mmp_test <dev>
234 # Returns 0 if it is set or not required, 1 if unset and required or error.
235 mmp_test ()
236 {
237         local dev=$1
238         local result=0
239
240         if [ "$REQUIRE_MMP_FEATURE" == "yes" ]; then
241                 if [ -x $TUNE2FS ]; then
242                         if ! test_feature_flag $dev "mmp"; then
243                                 echo "mmp feature flag is not set on $dev"
244                                 result=1
245                         fi
246                 else
247                         echo "$TUNE2FS not found"
248                         result=1
249                 fi
250         fi
251
252         return $result
253 }
254
255 # Usage: label_to_mountpt <label>
256 # Prints mount point path, if label matches a local or foreign server.
257 label_to_mountpt ()
258 {
259         local label=$1
260         local serv
261
262         for serv in $LOCAL_SRV; do
263                 if [ "$serv" == "$label" ]; then
264                         echo "$LOCAL_MOUNT_DIR/$label"
265                         return
266                 fi
267         done
268         for serv in $FOREIGN_SRV; do
269                 if [ "$serv" == "$label" ]; then
270                         echo "$FOREIGN_MOUNT_DIR/$label"
271                         return
272                 fi
273         done
274 }
275
276 # Usage: label_to_device <label>
277 # Prints canonical device path.
278 label_to_device ()
279 {
280         local label=$1
281         local path=/dev/disk/by-label/$label
282
283         if [ -h $path ] ; then
284                 readlink --canonicalize $path
285         else
286                 $LDEV -d $label
287         fi
288 }
289
290 # helper for mountpt_is_active() and device_is_active()
291 declare -r awkprog='BEGIN {rc = 1;}
292                         { if ($field == path) {rc = 0;} }
293                     END { exit rc;}'
294
295 # Usage: mountpt_is_active <label>
296 # Return 1 (inactive) on invalid label.
297 mountpt_is_active ()
298 {
299         local dir=`label_to_mountpt $1`
300         local result=1
301
302         if [ -n "$dir" ]; then
303                 cat /proc/mounts | awk "$awkprog" field=2 path=$dir
304                 result=$?
305         fi
306         return $result
307 }
308
309 # Usage: device_is_active <label>
310 # Return 1 (inactive) on invalid label.
311 device_is_active ()
312 {
313         local dev=`label_to_device $1`
314         local result=1
315
316         if [ -n "$dev" ]; then
317                 cat /proc/mounts | awk "$awkprog" field=1 path=$dir
318                 result=$?
319         fi
320         return $result
321 }
322
323 # Usage: mount_one_device <label> <successflag> [devtype]
324 # Remove <succesflag> on error (trick to detect errors after parallel runs).
325 mount_one_device ()
326 {
327         local label=$1
328         local successflag=$2
329         local devtype=$3
330         local dev=`label_to_device $label`
331         local dir=`label_to_mountpt $label`
332
333         # $dir and $dev have already been checked at ths point
334         if [ ! -d $dir ] && ! mkdir -p $dir; then
335                 rm -f $successflag
336                 return
337         fi
338         echo "Mounting $dev on $dir"
339         if ! mount -t lustre $MOUNT_OPTIONS $dev $dir; then
340                 rm -f $successflag
341                 return
342         fi
343 }
344
345 # Usage: assemble_md_device <device>
346 # Assemble the md device backing device.
347 # Return 0 if the array is assembled successfully or was already active,
348 # otherwise return error code from mdadm.
349 assemble_md_device ()
350 {
351         local dev=$1
352         local raidtab=$2
353         local args="-Aq"
354         local result=0
355
356         if [ -n "$raidtab" ] ; then
357                 args="$args -c $raidtab"
358         fi
359
360         if ! md_array_is_active $dev ; then
361                 mdadm $args $dev
362                 result=$?
363         fi
364
365         udev_trigger
366         return $result
367 }
368
369 # Usage: stop_md_device <device>
370 # Stop the md device backing device.
371 # Return 0 if the array is stopped successfully or was not active,
372 # otherwise return error code from mdadm.
373 stop_md_device ()
374 {
375         local dev=$1
376         local raidtab=$2
377         local args="-Sq"
378         local result=0
379
380         if [ -n "$raidtab" ] ; then
381                 args="$args -c $raidtab"
382         fi
383
384         if [ -e $dev ] && md_array_is_active $dev ; then
385                 mdadm $args $dev
386                 result=$?
387         fi
388
389         return $result
390 }
391
392 # Usage: md_array_is_active <device>
393 # return 0 if device is an active md RAID array, or 1 otherwise
394 md_array_is_active ()
395 {
396         local device=$1
397
398         [ -e "$device" ] || return 1
399
400         mdadm --detail -t $device > /dev/null 2>&1
401         if [ $? -eq 4 ] ; then
402                 return 1
403         fi
404         return 0
405 }
406
407 # Usage: start_services <label> [ <label> ... ]
408 # fsck and mount any devices listed as arguments (in parallel).
409 # Attempt to assemble software raid arrays or zfs pools backing
410 # Lustre devices.
411 start_services ()
412 {
413         local result=0
414         local devices=""
415         local dir dev label
416         local successflag
417         local labels
418
419         start_zfs_services
420         for label in $*; do
421                 dir=`label_to_mountpt $label`
422                 devtype=`$LDEV -t $label`
423                 dev=`label_to_device $label`
424                 journal=`$LDEV -j $label`
425                 raidtab=`$LDEV -r $label`
426
427                 if [ -z "$dir" ] || [ -z "$dev" ]; then
428                         echo "$label is not a valid lustre label on this node"
429                         result=2
430                         continue
431                 fi
432
433                 if [ "$devtype" = "md" ] ; then
434                         if ! assemble_md_device $dev $raidtab ; then
435                                 echo "failed to assemble array $dev backing $label"
436                                 result=2
437                                 continue
438                         fi
439                 elif [ "$devtype" = "zfs" ] ; then
440                         if ! import_zpool $label ; then
441                                 result=2
442                         fi
443                 fi
444
445                 # Journal device field in ldev.conf may be "-" or empty,
446                 # so only attempt to assemble if its an absolute path.
447                 # Ignore errors since the journal device may not be an
448                 # md device.
449                 if echo $journal | grep -q ^/ ; then
450                         assemble_md_device $journal $raidtab 2>/dev/null
451                 fi
452
453                 if [ "x$devtype" != "xzfs" ] ; then
454                         if mountpt_is_active $label || \
455                            device_is_active $label; then
456                                 echo "$label is already mounted"
457                                 # no error
458                                 continue
459                         fi
460                         if ! mmp_test $dev; then
461                                 result=2
462                                 continue
463                         fi
464                         if ! adjust_scsi_timeout $dev; then
465                                 result=2
466                                 continue
467                         fi
468                 fi
469                 devices="$devices $dev"
470                 labels="$labels $label"
471         done
472         if [ $result == 0 ]; then
473                 fsck_test $devices || return 2
474
475                 # Fork to handle multiple mount_one_device()'s in parallel.
476                 # Errors occurred if $successflag comes up missing afterwards.
477                 successflag=`mktemp`
478                 [ -e $successflag ] || return 2
479                 for label in $labels; do
480                         mount_one_device $label $successflag `$LDEV -t $label` &
481                         # stagger to avoid module loading races
482                         if [[ -n $MOUNT_DELAY && $MOUNT_DELAY -gt 0 ]] ; then
483                                 sleep $MOUNT_DELAY
484                         fi
485                 done
486                 for label in $labels; do
487                         wait
488                 done
489                 [ -e $successflag ] || return 2
490                 rm -f $successflag
491         fi
492
493         return $result
494 }
495
496 # Usage: stop_services <label> [ <label> ... ]
497 # Unmount any devices listed as arguments (serially).
498 # Any devices which are not mounted or don't exist are skipped with no error.
499 stop_services ()
500 {
501         local labels=$*
502         local result=0
503         local dir dev label
504
505         for label in $labels; do
506                 dir=`label_to_mountpt $label`
507                 if [ -z "$dir" ]; then
508                         echo "$label is not a valid lustre label on this node"
509                         result=2
510                         continue
511                 fi
512                 if ! mountpt_is_active $label; then
513                         #echo "$label is not mounted"
514                         # no error
515                         continue
516                 fi
517                 echo "Unmounting $dir"
518                 umount $dir || result=2
519         done
520         # double check!
521         for label in $labels; do
522                 if mountpt_is_active $label; then
523                         dir=`label_to_mountpt $label`
524                         echo "Mount point $dir is still active"
525                         result=2
526                 fi
527                 if device_is_active $label; then
528                         dev=`label_to_device $label`
529                         echo "Device $dev is still active"
530                         result=2
531                 fi
532         done
533         stop_devices $labels
534
535         return $result
536 }
537
538 # Usage: start_lustre_services [local|foreign|all|<label>]
539 # If no parameter is specified, local devices will be started.
540 start_lustre_services ()
541 {
542         local labels=""
543
544         case "$1" in
545                 ""|local)
546                         labels=$LOCAL_SRV
547                         ;;
548                 foreign)
549                         labels=$FOREIGN_SRV
550                         ;;
551                 all)    labels="$LOCAL_SRV $FOREIGN_SRV"
552                         ;;
553                 *)      labels="$1"
554                         ;;
555         esac
556         # for use by heartbeat V1 resource agent:
557         # starting an already-started service must not be an error
558         start_services $labels || exit 2
559 }
560
561 # Usage: stop_lustre_services [local|foreign|all|<label>]
562 # If no parameter is specified all devices will be stopped.
563 stop_lustre_services ()
564 {
565         local labels=""
566
567         case "$1" in
568                 local) labels=$LOCAL_SRV
569                         ;;
570                 foreign)
571                         labels=$FOREIGN_SRV
572                         ;;
573                 ""|all) labels="$LOCAL_SRV $FOREIGN_SRV"
574                         ;;
575                 *)      labels="$1"
576                         ;;
577         esac
578         # for use by heartbeat V1 resource agent:
579         # stopping already-stopped service must not be an error
580         stop_services $labels || exit 2
581 }
582
583 # General lustre health check - not device specific.
584 health_check ()
585 {
586         old_nullglob="`shopt -p nullglob`"
587         shopt -u nullglob
588
589         STATE="stopped"
590         # LSB compliance - return 3 if service is not running
591         # Lustre-specific returns
592         # 150 - partial startup
593         # 151 - health_check unhealthy
594         # 152 - LBUG
595         RETVAL=3
596         egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
597
598         # check for any configured devices (may indicate partial startup)
599         if [ -d /proc/fs/lustre ]; then
600                 if [ -n "`cat /proc/fs/lustre/devices 2> /dev/null`" ] ; then
601                         STATE="partial"
602                         RETVAL=150
603                 fi
604
605                 # check for either a server or a client filesystem
606                 MDT="`ls /proc/fs/lustre/mdt/*/recovery_status 2> /dev/null`"
607                 OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status \
608                         2> /dev/null`"
609                 LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
610                 if [ "$MDT" -o "$OST" -o "$LLITE" ]; then
611                         STATE="running"
612                         RETVAL=0
613                 fi
614         else
615                 # check if this is a router
616                 if [ -d /proc/sys/lnet ]; then
617                         ROUTER="`cat /proc/sys/lnet/routes | head -1 |
618                                 grep -i -c \"Routing enabled\"`"
619                         if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
620                                 STATE="running"
621                                 RETVAL=0
622                         fi
623                 fi
624         fi
625
626         # check for server disconnections
627         DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
628         if [ -n "$DISCON" ] ; then
629                 STATE="disconnected"
630                 RETVAL=0
631         fi
632
633         # check for servers in recovery
634         if [ -n "$MDT$OST" ] && grep -q RECOV $MDT $OST ; then
635                 STATE="recovery"
636                 RETVAL=0
637         fi
638
639         # check for error in health_check
640         HEALTH="/proc/fs/lustre/health_check"
641         if [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH ; then
642                 STATE="unhealthy"
643                 RETVAL=1
644         fi
645
646         # check for LBUG
647         if [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH ; then
648                 STATE="LBUG"
649                 RETVAL=152
650         fi
651
652         echo $STATE
653         eval $old_nullglob
654         return $RETVAL
655 }
656
657 # Usage: status [local|foreign|all|<label>]
658 # If no parameter is specified, general lustre health status will be reported.
659 status ()
660 {
661         local labels=""
662         local label dir
663         local valid_devs=0
664
665         case "$1" in
666                 local) labels=$LOCAL_SRV;
667                         ;;
668                 foreign)
669                         labels=$FOREIGN_SRV;
670                         ;;
671                 all)    labels="$LOCAL_SRV $FOREIGN_SRV"
672                         ;;
673                 "")     # ASSUMPTION: this is not the heartbeat res agent
674                         health_check
675                         exit $?
676                         ;;
677                 *)      labels=$1
678                         ;;
679         esac
680         # for use by heartbeat V1 resource agent:
681         # print "running" if *anything* is running.
682         for label in $labels; do
683                 dir=`label_to_device $label`
684                 if [ -z "$dir" ]; then
685                         echo "$label is not a valid lustre label on this node"
686                         # no error
687                         continue
688                 fi
689                 valid_devs=1
690                 if mountpt_is_active $label || device_is_active $label; then
691                         echo "running"
692                         exit 0
693                 fi
694         done
695         [ $valid_devs == 1 ] && echo "stopped"
696         exit 3
697 }
698
699 usage ()
700 {
701         cat <<EOF
702 Usage: lustre {start|stop|status|restart|reload|condrestart}
703
704        lustre start  [local|foreign|<label>]
705        lustre stop   [local|foreign|<label>]
706        lustre status [local|foreign|<label>]
707 EOF
708         exit 1
709 }
710
711 # See how we were called.
712 case "$1" in
713   start)
714         if [ $# -gt 2 ] ; then
715                 echo "ERROR: Too many arguments."
716                 usage
717         fi
718         run_preexec_check "start"
719         start_lustre_services $2
720         run_postexec_check "start"
721         ;;
722   stop)
723         if [ $# -gt 2 ] ; then
724                 echo "ERROR: Too many arguments."
725                 usage
726         fi
727         run_preexec_check "stop"
728         stop_lustre_services $2
729         run_postexec_check "stop"
730         ;;
731   status)
732         if [ $# -gt 2 ] ; then
733                 echo "ERROR: Too many arguments."
734                 usage
735         fi
736         status $2
737         ;;
738   restart)
739         $0 stop
740         $0 start
741         ;;
742   reload)
743         ;;
744   probe)
745         ;;
746   condrestart)
747         if grep lustre /proc/mounts ; then
748                 $0 stop
749                 $0 start
750         fi
751         ;;
752   *)
753         usage
754 esac
755
756 exit 0