Whamcloud - gitweb
LU-1866 osd: ancillary work for initial OI scrub
[fs/lustre-release.git] / lustre / scripts / lustre
1 #!/bin/bash
2 #
3 # lustre        This shell script takes care of starting and stopping
4 #              the lustre services.
5 #
6 # chkconfig: - 60 20
7 # description:  Part of the lustre file system.
8 # probe: true
9 # config: /etc/sysconfig/lustre
10
11 PATH=/sbin:/usr/sbin:/bin:/usr/bin
12
13 # Source function library.
14 . /etc/rc.d/init.d/functions
15
16 # Source networking configuration.
17 if [ ! -f /etc/sysconfig/network ]; then
18         exit 0
19 fi
20
21 . /etc/sysconfig/network
22
23 LDEV=${LDEV:-"/usr/sbin/ldev"}
24 ZPOOL_LAYOUT=/usr/bin/zpool_layout
25 UDEVADM=${UDEVADM:-/sbin/udevadm}
26
27 # Check that networking is up.
28 [ "${NETWORKING}" = "no" ] && exit 0
29
30 # Check for and source configuration file otherwise set defaults
31 [ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
32 FSCK_ARGS=${FSCK_ARGS:-""}
33 MOUNT_OPTIONS=${MOUNT_OPTIONS:-""}
34 LOCAL_SRV=${LOCAL_SRV:-"`$LDEV -l 2>/dev/null`"}
35 FOREIGN_SRV=${FOREIGN_SRV:-"`$LDEV -f 2>/dev/null`"}
36 REQUIRE_MMP_FEATURE=${REQUIRE_MMP_FEATURE:-${FOREIGN_SRV:+"yes"}}
37 LOCAL_MOUNT_DIR=${LOCAL_MOUNT_DIR:-"/mnt/lustre/local"}
38 FOREIGN_MOUNT_DIR=${FOREIGN_MOUNT_DIR:-"/mnt/lustre/foreign"}
39 SETUP_DEVICES=${SETUP_DEVICES:-""}
40 ZPOOL_LAYOUT_BUSES=${ZPOOL_LAYOUT_BUSES:-""}
41 ZPOOL_LAYOUT_PORTS=${ZPOOL_LAYOUT_PORTS:-""}
42 ZPOOL_LAYOUT_MAP=${ZPOOL_LAYOUT_MAP:-""}
43 MOUNT_DELAY=${MOUNT_DELAY:-2}
44 LOAD_ZFS=${LOAD_ZFS:-""}
45
46 if [ -z "$TUNE2FS" ] ; then
47         TUNE2FS=`which tunefs.ldiskfs 2>/dev/null`
48         if [ -z "$TUNE2FS" ] ; then
49                 TUNE2FS=`which tune2fs 2>/dev/null`
50         fi
51 fi
52
53 if [ -z "$PFSCK" ] ; then
54         PFSCK=`which pfsck.ldiskfs 2>/dev/null`
55         if [ -z "$PFSCK" ] ; then
56                 PFSCK=`which fsck 2>/dev/null`
57         fi
58 fi
59
60 shopt -s nullglob
61
62 start_zfs_services ()
63 {
64         if [ -n "$ZPOOL_LAYOUT_BUSES" -a -n "$ZPOOL_LAYOUT_PORTS" ] ; then
65                 MAP_ARG=${ZPOOL_LAYOUT_MAP:+"-m $ZPOOL_LAYOUT_MAP"}
66                 $ZPOOL_LAYOUT -t -b "$ZPOOL_LAYOUT_BUSES" \
67                         -p "$ZPOOL_LAYOUT_PORTS" $MAP_ARG
68         fi
69         if [ "$LOAD_ZFS" = "yes" ] && ! modprobe zfs ; then
70                 echo "Failed to load zfs module.  Aborting."
71                 exit 1
72         fi
73 }
74
75 stop_devices ()
76 {
77         local labels=$*
78         local label devtype
79         for label in $labels; do
80                 devtype=`$LDEV -t $label`
81                 if [ "$devtype" = "zfs" ] ; then
82                         export_zpool $label
83                 elif [ "$devtype" = "md" ] ; then
84                         dev=`label_to_device $label`
85                         journal=`$LDEV -j $label`
86                         stop_md_device $dev
87                         stop_md_device $journal
88                 fi
89         done
90 }
91
92 import_zpool ()
93 {
94         local result=1
95         local label=$1
96         local pool=`$LDEV -z $label`
97         local args="-N $ZPOOL_IMPORT_ARGS"
98         local cache=`$LDEV -r $label`
99         # -c is incompatible with -d
100         if [ -n "$cache" ] ; then
101                 args="$args -c $cache"
102         elif [ -n "$ZPOOL_IMPORT_DIR" ] ; then
103                 args="$args -d $ZPOOL_IMPORT_DIR"
104         fi
105
106         if zpool status $pool >/dev/null 2>&1 ; then
107                 result=0
108         elif [ -n "$pool" ] ; then
109                 zpool import $pool $args 2>/dev/null
110                 result=$?
111         fi
112         if [ $result -ne 0 ] ; then
113                 echo "Unexpected return code from import of pool $pool: $result"
114         fi
115         return $result
116 }
117
118 export_zpool ()
119 {
120         local label=$1
121         local pool=`$LDEV -z $label`
122         zpool export $pool 2>/dev/null
123 }
124
125 # Trigger udev and wait for it to settle.
126 udev_trigger()
127 {
128         if [ -x ${UDEVADM} ]; then
129                 ${UDEVADM} trigger --action=change --subsystem-match=block
130                 ${UDEVADM} settle
131         else
132                 /sbin/udevtrigger
133                 /sbin/udevsettle
134         fi
135 }
136
137 # Usage: run_preexec_check [ start | restart | condrestart ]
138 # The single parameter will be passed to the PREEXEC_SCRIPT
139 run_preexec_check ()
140 {
141         if [ -n "$PREEXEC_CHECK" ] && ! $PREEXEC_CHECK ; then
142                 echo "Pre-exec check \"$PREEXEC_CHECK\" failed.  Aborting."
143                 exit 1
144         fi
145
146         if [ -n "$PREEXEC_SCRIPT" ] && ! "$PREEXEC_SCRIPT" "$1" ; then
147                 echo "Pre-exec script \"$PREEXEC_SCRIPT\" failed.  Aborting."
148                 exit 1
149         fi
150 }
151
152 # Usage: run_postexec_check [ start | restart | condrestart ]
153 # The single parameter will be passed to the PREEXEC_SCRIPT
154 run_postexec_check ()
155 {
156         if [ -n "$POSTEXEC_CHECK" ] && ! $POSTEXEC_CHECK ; then
157                 echo "Post-exec check \"$POSTEXEC_CHECK\" failed.  Aborting."
158                 exit 1
159         fi
160
161         if [ -n "$POSTEXEC_SCRIPT" ] && ! "$POSTEXEC_SCRIPT" "$1" ; then
162                 echo "Post-exec script \"$POSTEXEC_SCRIPT\" failed.  Aborting."
163                 exit 1
164         fi
165 }
166
167 # Usage: adjust_scsi_timeout <dev>
168 adjust_scsi_timeout ()
169 {
170         local dev=$1
171
172         if [ -n "$SCSI_DEVICE_TIMEOUT" ]; then
173                 # make sure that it is actually a SCSI (sd) device
174                 local name=`basename $dev`
175                 local proc=/sys/block/${name}/device/timeout
176                 local driver=`readlink /sys/block/${name}/device/driver`
177                 if [ -n "$driver" ] && [ "`basename $driver`" == "sd" ]; then
178                         if ! echo $SCSI_DEVICE_TIMEOUT >$proc; then
179                                 echo "FAILED: could not adjust ${dev} timeout"
180                                 return 1
181                         fi
182                 fi
183         fi
184         return 0
185 }
186
187 # Usage: fsck_test <dev> [ <dev> ... ]
188 # Checks all devices in parallel if FSCK_ARGS is set.
189 fsck_test ()
190 {
191         local devices="$*"
192
193         # Filter out non-absolute paths, which are probably ZFS datasets
194         devices=`echo $devices |xargs -n 1|grep '^/'|xargs`
195
196         if [ -n "${FSCK_ARGS}" -a -n "$devices" ]; then
197                 if [ -x $PFSCK ] ; then
198                         echo "$PFSCK $devices -- ${FSCK_ARGS}"
199                         $PFSCK $devices -- ${FSCK_ARGS}
200                         if [ $? -ne 0 -a $? -ne 1 ] ; then
201                                 echo "FAILED: $PFSCK -- ${FSCK_ARGS}: $?"
202                                 return 1
203                         fi
204                 else
205                         echo "$PFSCK not found"
206                         return 1
207                 fi
208         fi
209         return 0
210 }
211
212 # Usage: test_feature_flag <dev> <flag>
213 test_feature_flag()
214 {
215         local dev=$1
216         local flag=$2
217         local result=1
218         local feature
219
220         for feature in `$TUNE2FS -l $dev 2>/dev/null \
221                                 | grep features: | sed -e 's/^.*: //'`; do
222                 if [ "$feature" == "$flag" ]; then
223                         result=0
224                         break
225                 fi
226         done
227
228         return $result
229 }
230
231 # Usage: mmp_test <dev>
232 # Returns 0 if it is set or not required, 1 if unset and required or error.
233 mmp_test ()
234 {
235         local dev=$1
236         local result=0
237
238         if [ "$REQUIRE_MMP_FEATURE" == "yes" ]; then
239                 if [ -x $TUNE2FS ]; then
240                         if ! test_feature_flag $dev "mmp"; then
241                                 echo "mmp feature flag is not set on $dev"
242                                 result=1
243                         fi
244                 else
245                         echo "$TUNE2FS not found"
246                         result=1
247                 fi
248         fi
249
250         return $result
251 }
252
253 # Usage: label_to_mountpt <label>
254 # Prints mount point path, if label matches a local or foreign server.
255 label_to_mountpt ()
256 {
257         local label=$1
258         local serv
259
260         for serv in $LOCAL_SRV; do
261                 if [ "$serv" == "$label" ]; then
262                         echo "$LOCAL_MOUNT_DIR/$label"
263                         return
264                 fi
265         done
266         for serv in $FOREIGN_SRV; do
267                 if [ "$serv" == "$label" ]; then
268                         echo "$FOREIGN_MOUNT_DIR/$label"
269                         return
270                 fi
271         done
272 }
273
274 # Usage: label_to_device <label>
275 # Prints canonical device path.
276 label_to_device ()
277 {
278         local label=$1
279         local path=/dev/disk/by-label/$label
280
281         if [ -h $path ] ; then
282                 readlink --canonicalize $path
283         else
284                 $LDEV -d $label
285         fi
286 }
287
288 # helper for mountpt_is_active() and device_is_active()
289 declare -r awkprog='BEGIN {rc = 1;}
290                         { if ($field == path) {rc = 0;} }
291                     END { exit rc;}'
292
293 # Usage: mountpt_is_active <label>
294 # Return 1 (inactive) on invalid label.
295 mountpt_is_active ()
296 {
297         local dir=`label_to_mountpt $1`
298         local result=1
299
300         if [ -n "$dir" ]; then
301                 cat /proc/mounts | awk "$awkprog" field=2 path=$dir
302                 result=$?
303         fi
304         return $result
305 }
306
307 # Usage: device_is_active <label>
308 # Return 1 (inactive) on invalid label.
309 device_is_active ()
310 {
311         local dev=`label_to_device $1`
312         local result=1
313
314         if [ -n "$dev" ]; then
315                 cat /proc/mounts | awk "$awkprog" field=1 path=$dir
316                 result=$?
317         fi
318         return $result
319 }
320
321 # Usage: mount_one_device <label> <successflag> [devtype]
322 # Remove <succesflag> on error (trick to detect errors after parallel runs).
323 mount_one_device ()
324 {
325         local label=$1
326         local successflag=$2
327         local devtype=$3
328         local dev=`label_to_device $label`
329         local dir=`label_to_mountpt $label`
330
331         # $dir and $dev have already been checked at ths point
332         if [ ! -d $dir ] && ! mkdir -p $dir; then
333                 rm -f $successflag
334                 return
335         fi
336         echo "Mounting $dev on $dir"
337         if ! mount -t lustre $MOUNT_OPTIONS $dev $dir; then
338                 rm -f $successflag
339                 return
340         fi
341 }
342
343 # Usage: assemble_md_device <device>
344 # Assemble the md device backing device.
345 # Return 0 if the array is assembled successfully or was already active,
346 # otherwise return error code from mdadm.
347 assemble_md_device ()
348 {
349         local dev=$1
350         local raidtab=$2
351         local args="-Aq"
352         local result=0
353
354         if [ -n "$raidtab" ] ; then
355                 args="$args -c $raidtab"
356         fi
357
358         if ! md_array_is_active $dev ; then
359                 mdadm $args $dev
360                 result=$?
361         fi
362
363         udev_trigger
364         return $result
365 }
366
367 # Usage: stop_md_device <device>
368 # Stop the md device backing device.
369 # Return 0 if the array is stopped successfully or was not active,
370 # otherwise return error code from mdadm.
371 stop_md_device ()
372 {
373         local dev=$1
374         local raidtab=$2
375         local args="-Sq"
376         local result=0
377
378         if [ -n "$raidtab" ] ; then
379                 args="$args -c $raidtab"
380         fi
381
382         if [ -e $dev ] && md_array_is_active $dev ; then
383                 mdadm $args $dev
384                 result=$?
385         fi
386
387         return $result
388 }
389
390 # Usage: md_array_is_active <device>
391 # return 0 if device is an active md RAID array, or 1 otherwise
392 md_array_is_active ()
393 {
394         local device=$1
395
396         [ -e "$device" ] || return 1
397
398         mdadm --detail -t $device > /dev/null 2>&1
399         if [ $? -eq 4 ] ; then
400                 return 1
401         fi
402         return 0
403 }
404
405 # Usage: start_services <label> [ <label> ... ]
406 # fsck and mount any devices listed as arguments (in parallel).
407 # Attempt to assemble software raid arrays or zfs pools backing
408 # Lustre devices.
409 start_services ()
410 {
411         local result=0
412         local devices=""
413         local dir dev label
414         local successflag
415         local labels
416
417         start_zfs_services
418         for label in $*; do
419                 dir=`label_to_mountpt $label`
420                 devtype=`$LDEV -t $label`
421                 dev=`label_to_device $label`
422                 journal=`$LDEV -j $label`
423                 raidtab=`$LDEV -r $label`
424
425                 if [ -z "$dir" ] || [ -z "$dev" ]; then
426                         echo "$label is not a valid lustre label on this node"
427                         result=2
428                         continue
429                 fi
430
431                 if [ "$devtype" = "md" ] ; then
432                         if ! assemble_md_device $dev $raidtab ; then
433                                 echo "failed to assemble array $dev backing $label"
434                                 result=2
435                                 continue
436                         fi
437                 elif [ "$devtype" = "zfs" ] ; then
438                         if ! import_zpool $label ; then
439                                 result=2
440                         fi
441                 fi
442
443                 # Journal device field in ldev.conf may be "-" or empty,
444                 # so only attempt to assemble if its an absolute path.
445                 # Ignore errors since the journal device may not be an
446                 # md device.
447                 if echo $journal | grep -q ^/ ; then
448                         assemble_md_device $journal $raidtab 2>/dev/null
449                 fi
450
451                 if [ "x$devtype" != "xzfs" ] ; then
452                         if mountpt_is_active $label || \
453                            device_is_active $label; then
454                                 echo "$label is already mounted"
455                                 # no error
456                                 continue
457                         fi
458                         if ! mmp_test $dev; then
459                                 result=2
460                                 continue
461                         fi
462                         if ! adjust_scsi_timeout $dev; then
463                                 result=2
464                                 continue
465                         fi
466                 fi
467                 devices="$devices $dev"
468                 labels="$labels $label"
469         done
470         if [ $result == 0 ]; then
471                 fsck_test $devices || return 2
472
473                 # Fork to handle multiple mount_one_device()'s in parallel.
474                 # Errors occurred if $successflag comes up missing afterwards.
475                 successflag=`mktemp`
476                 [ -e $successflag ] || return 2
477                 for label in $labels; do
478                         mount_one_device $label $successflag `$LDEV -t $label` &
479                         # stagger to avoid module loading races
480                         if [[ -n $MOUNT_DELAY && $MOUNT_DELAY -gt 0 ]] ; then
481                                 sleep $MOUNT_DELAY
482                         fi
483                 done
484                 for label in $labels; do
485                         wait
486                 done
487                 [ -e $successflag ] || return 2
488                 rm -f $successflag
489         fi
490
491         return $result
492 }
493
494 # Usage: stop_services <label> [ <label> ... ]
495 # Unmount any devices listed as arguments (serially).
496 # Any devices which are not mounted or don't exist are skipped with no error.
497 stop_services ()
498 {
499         local labels=$*
500         local result=0
501         local dir dev label
502
503         for label in $labels; do
504                 dir=`label_to_mountpt $label`
505                 if [ -z "$dir" ]; then
506                         echo "$label is not a valid lustre label on this node"
507                         result=2
508                         continue
509                 fi
510                 if ! mountpt_is_active $label; then
511                         #echo "$label is not mounted"
512                         # no error
513                         continue
514                 fi
515                 echo "Unmounting $dir"
516                 umount $dir || result=2
517         done
518         # double check!
519         for label in $labels; do
520                 if mountpt_is_active $label; then
521                         dir=`label_to_mountpt $label`
522                         echo "Mount point $dir is still active"
523                         result=2
524                 fi
525                 if device_is_active $label; then
526                         dev=`label_to_device $label`
527                         echo "Device $dev is still active"
528                         result=2
529                 fi
530         done
531         stop_devices $labels
532
533         return $result
534 }
535
536 # Usage: start_lustre_services [local|foreign|all|<label>]
537 # If no parameter is specified, local devices will be started.
538 start_lustre_services ()
539 {
540         local labels=""
541
542         case "$1" in
543                 ""|local)
544                         labels=$LOCAL_SRV
545                         ;;
546                 foreign)
547                         labels=$FOREIGN_SRV
548                         ;;
549                 all)    labels="$LOCAL_SRV $FOREIGN_SRV"
550                         ;;
551                 *)      labels="$1"
552                         ;;
553         esac
554         # for use by heartbeat V1 resource agent:
555         # starting an already-started service must not be an error
556         start_services $labels || exit 2
557 }
558
559 # Usage: stop_lustre_services [local|foreign|all|<label>]
560 # If no parameter is specified all devices will be stopped.
561 stop_lustre_services ()
562 {
563         local labels=""
564
565         case "$1" in
566                 local) labels=$LOCAL_SRV
567                         ;;
568                 foreign)
569                         labels=$FOREIGN_SRV
570                         ;;
571                 ""|all) labels="$LOCAL_SRV $FOREIGN_SRV"
572                         ;;
573                 *)      labels="$1"
574                         ;;
575         esac
576         # for use by heartbeat V1 resource agent:
577         # stopping already-stopped service must not be an error
578         stop_services $labels || exit 2
579 }
580
581 # General lustre health check - not device specific.
582 health_check ()
583 {
584         old_nullglob="`shopt -p nullglob`"
585         shopt -u nullglob
586
587         STATE="stopped"
588         # LSB compliance - return 3 if service is not running
589         # Lustre-specific returns
590         # 150 - partial startup
591         # 151 - health_check unhealthy
592         # 152 - LBUG
593         RETVAL=3
594         egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
595
596         # check for any configured devices (may indicate partial startup)
597         if [ -d /proc/fs/lustre ]; then
598                 if [ -n "`cat /proc/fs/lustre/devices 2> /dev/null`" ] ; then
599                         STATE="partial"
600                         RETVAL=150
601                 fi
602
603                 # check for either a server or a client filesystem
604                 MDT="`ls /proc/fs/lustre/mdt/*/recovery_status 2> /dev/null`"
605                 OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status \
606                         2> /dev/null`"
607                 LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
608                 if [ "$MDT" -o "$OST" -o "$LLITE" ]; then
609                         STATE="running"
610                         RETVAL=0
611                 fi
612         else
613                 # check if this is a router
614                 if [ -d /proc/sys/lnet ]; then
615                         ROUTER="`cat /proc/sys/lnet/routes | head -1 |
616                                 grep -i -c \"Routing enabled\"`"
617                         if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
618                                 STATE="running"
619                                 RETVAL=0
620                         fi
621                 fi
622         fi
623
624         # check for server disconnections
625         DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
626         if [ -n "$DISCON" ] ; then
627                 STATE="disconnected"
628                 RETVAL=0
629         fi
630
631         # check for servers in recovery
632         if [ -n "$MDT$OST" ] && grep -q RECOV $MDT $OST ; then
633                 STATE="recovery"
634                 RETVAL=0
635         fi
636
637         # check for error in health_check
638         HEALTH="/proc/fs/lustre/health_check"
639         if [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH ; then
640                 STATE="unhealthy"
641                 RETVAL=1
642         fi
643
644         # check for LBUG
645         if [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH ; then
646                 STATE="LBUG"
647                 RETVAL=152
648         fi
649
650         echo $STATE
651         eval $old_nullglob
652         return $RETVAL
653 }
654
655 # Usage: status [local|foreign|all|<label>]
656 # If no parameter is specified, general lustre health status will be reported.
657 status ()
658 {
659         local labels=""
660         local label dir
661         local valid_devs=0
662
663         case "$1" in
664                 local) labels=$LOCAL_SRV;
665                         ;;
666                 foreign)
667                         labels=$FOREIGN_SRV;
668                         ;;
669                 all)    labels="$LOCAL_SRV $FOREIGN_SRV"
670                         ;;
671                 "")     # ASSUMPTION: this is not the heartbeat res agent
672                         health_check
673                         exit $?
674                         ;;
675                 *)      labels=$1
676                         ;;
677         esac
678         # for use by heartbeat V1 resource agent:
679         # print "running" if *anything* is running.
680         for label in $labels; do
681                 dir=`label_to_device $label`
682                 if [ -z "$dir" ]; then
683                         echo "$label is not a valid lustre label on this node"
684                         # no error
685                         continue
686                 fi
687                 valid_devs=1
688                 if mountpt_is_active $label || device_is_active $label; then
689                         echo "running"
690                         exit 0
691                 fi
692         done
693         [ $valid_devs == 1 ] && echo "stopped"
694         exit 3
695 }
696
697 usage ()
698 {
699         cat <<EOF
700 Usage: lustre {start|stop|status|restart|reload|condrestart}
701
702        lustre start  [local|foreign|<label>]
703        lustre stop   [local|foreign|<label>]
704        lustre status [local|foreign|<label>]
705 EOF
706         exit 1
707 }
708
709 # See how we were called.
710 case "$1" in
711   start)
712         if [ $# -gt 2 ] ; then
713                 echo "ERROR: Too many arguments."
714                 usage
715         fi
716         run_preexec_check "start"
717         start_lustre_services $2
718         run_postexec_check "start"
719         ;;
720   stop)
721         if [ $# -gt 2 ] ; then
722                 echo "ERROR: Too many arguments."
723                 usage
724         fi
725         run_preexec_check "stop"
726         stop_lustre_services $2
727         run_postexec_check "stop"
728         ;;
729   status)
730         if [ $# -gt 2 ] ; then
731                 echo "ERROR: Too many arguments."
732                 usage
733         fi
734         status $2
735         ;;
736   restart)
737         $0 stop
738         $0 start
739         ;;
740   reload)
741         ;;
742   probe)
743         ;;
744   condrestart)
745         if grep lustre /proc/mounts ; then
746                 $0 stop
747                 $0 start
748         fi
749         ;;
750   *)
751         usage
752 esac
753
754 exit 0