Whamcloud - gitweb
LU-1943 osd: ldiskfs osd to set param.ddp_inodespace
[fs/lustre-release.git] / lustre / scripts / lustre.in
1 #!/bin/bash
2 #
3 # lustre        This shell script takes care of starting and stopping
4 #              the lustre services.
5 #
6 # chkconfig: - 60 20
7 # description:  Part of the lustre file system.
8 # probe: true
9 # config: /etc/sysconfig/lustre
10
11 # Source function library.
12 . /etc/rc.d/init.d/functions
13
14 # Source networking configuration.
15 if [ ! -f /etc/sysconfig/network ]; then
16         exit 0
17 fi
18
19 . /etc/sysconfig/network
20
21 LDEV=${LDEV:-"/usr/sbin/ldev"}
22 ZPOOL_LAYOUT=/usr/bin/zpool_layout
23 UDEVADM=${UDEVADM:-/sbin/udevadm}
24
25 # Check that networking is up.
26 [ "${NETWORKING}" = "no" ] && exit 0
27
28 # Check for and source configuration file otherwise set defaults
29 [ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
30 FSCK_ARGS=${FSCK_ARGS:-""}
31 MOUNT_OPTIONS=${MOUNT_OPTIONS:-""}
32 LOCAL_SRV=${LOCAL_SRV:-"`$LDEV -l 2>/dev/null`"}
33 FOREIGN_SRV=${FOREIGN_SRV:-"`$LDEV -f 2>/dev/null`"}
34 REQUIRE_MMP_FEATURE=${REQUIRE_MMP_FEATURE:-${FOREIGN_SRV:+"yes"}}
35 LOCAL_MOUNT_DIR=${LOCAL_MOUNT_DIR:-"/mnt/lustre/local"}
36 FOREIGN_MOUNT_DIR=${FOREIGN_MOUNT_DIR:-"/mnt/lustre/foreign"}
37 SETUP_DEVICES=${SETUP_DEVICES:-""}
38 ZPOOL_LAYOUT_BUSES=${ZPOOL_LAYOUT_BUSES:-""}
39 ZPOOL_LAYOUT_PORTS=${ZPOOL_LAYOUT_PORTS:-""}
40 ZPOOL_LAYOUT_MAP=${ZPOOL_LAYOUT_MAP:-""}
41 MOUNT_DELAY=${MOUNT_DELAY:-2}
42 LOAD_ZFS=${LOAD_ZFS:-""}
43
44 shopt -s nullglob
45
46 start_zfs_services ()
47 {
48         if [ -n "$ZPOOL_LAYOUT_BUSES" -a -n "$ZPOOL_LAYOUT_PORTS" ] ; then
49                 MAP_ARG=${ZPOOL_LAYOUT_MAP:+"-m $ZPOOL_LAYOUT_MAP"}
50                 $ZPOOL_LAYOUT -t -b "$ZPOOL_LAYOUT_BUSES" \
51                         -p "$ZPOOL_LAYOUT_PORTS" $MAP_ARG
52         fi
53         if [ "$LOAD_ZFS" = "yes" ] && ! modprobe zfs ; then
54                 echo "Failed to load zfs module.  Aborting."
55                 exit 1
56         fi
57 }
58
59 stop_devices ()
60 {
61         local labels=$*
62         local result=0
63         local label devtype
64         for label in $labels; do
65                 devtype=`$LDEV -t $label`
66                 if [ "$devtype" = "zfs" ] ; then
67                         export_zpool $label
68                 elif [ "$devtype" = "md" ] ; then
69                         dev=`label_to_device $label`
70                         journal=`$LDEV -j $label`
71                         stop_md_device $dev
72                         stop_md_device $journal
73                 fi
74         done
75 }
76
77 import_zpool ()
78 {
79         local result=1
80         local label=$1
81         local pool=`$LDEV -z $label`
82         local args="-N $ZPOOL_IMPORT_ARGS"
83         local cache=`$LDEV -r $label`
84         # -c is incompatible with -d
85         if [ -n "$cache" ] ; then
86                 args="$args -c $cache"
87         elif [ -n "$ZPOOL_IMPORT_DIR" ] ; then
88                 args="$args -d $ZPOOL_IMPORT_DIR"
89         elif [ -d "/dev/disk/by-vdev" ] ; then
90                 args="$args -d /dev/disk/by-vdev"
91         elif [ -d "/dev/mapper" ] ; then
92                 args="$args -d /dev/mapper"
93         fi
94
95         if zpool status $pool >/dev/null 2>&1 ; then
96                 result=0
97         elif [ -n "$pool" ] ; then
98                 zpool import $pool $args 2>/dev/null
99                 result=$?
100         fi
101         return $result
102 }
103
104 export_zpool ()
105 {
106         local label=$1
107         local pool=`$LDEV -z $label`
108         zpool export $pool 2>/dev/null
109 }
110
111 # Trigger udev and wait for it to settle.
112 udev_trigger()
113 {
114         if [ -x ${UDEVADM} ]; then
115                 ${UDEVADM} trigger --action=change --subsystem-match=block
116                 ${UDEVADM} settle
117         else
118                 /sbin/udevtrigger
119                 /sbin/udevsettle
120         fi
121 }
122
123 # Usage: run_preexec_check [ start | restart | condrestart ]
124 # The single parameter will be passed to the PREEXEC_SCRIPT
125 run_preexec_check ()
126 {
127         if [ -n "$PREEXEC_CHECK" ] && ! $PREEXEC_CHECK ; then
128                 echo "Pre-exec check \"$PREEXEC_CHECK\" failed.  Aborting."
129                 exit 1
130         fi
131
132         if [ -n "$PREEXEC_SCRIPT" ] && ! "$PREEXEC_SCRIPT" "$1" ; then
133                 echo "Pre-exec script \"$PREEXEC_SCRIPT\" failed.  Aborting."
134                 exit 1
135         fi
136 }
137
138 # Usage: run_postexec_check [ start | restart | condrestart ]
139 # The single parameter will be passed to the PREEXEC_SCRIPT
140 run_postexec_check ()
141 {
142         if [ -n "$POSTEXEC_CHECK" ] && ! $POSTEXEC_CHECK ; then
143                 echo "Post-exec check \"$POSTEXEC_CHECK\" failed.  Aborting."
144                 exit 1
145         fi
146
147         if [ -n "$POSTEXEC_SCRIPT" ] && ! "$POSTEXEC_SCRIPT" "$1" ; then
148                 echo "Post-exec script \"$POSTEXEC_SCRIPT\" failed.  Aborting."
149                 exit 1
150         fi
151 }
152
153 # Usage: adjust_scsi_timeout <dev>
154 adjust_scsi_timeout ()
155 {
156         local dev=$1
157
158         if [ -n "$SCSI_DEVICE_TIMEOUT" ]; then
159                 # make sure that it is actually a SCSI (sd) device
160                 local name=`basename $dev`
161                 local proc=/sys/block/${name}/device/timeout
162                 local driver=`readlink /sys/block/${name}/device/driver`
163                 if [ -n "$driver" ] && [ "`basename $driver`" == "sd" ]; then
164                         if ! echo $SCSI_DEVICE_TIMEOUT >$proc; then
165                                 echo "FAILED: could not adjust ${dev} timeout"
166                                 return 1
167                         fi
168                 fi
169         fi
170         return 0
171 }
172
173 # Usage: fsck_test <dev> [ <dev> ... ]
174 # Checks all devices in parallel if FSCK_ARGS is set.
175 fsck_test ()
176 {
177         local devices="$*"
178
179         # Filter out non-absolute paths, which are probably ZFS datasets
180         devices=`echo $devices |xargs -n 1|grep '^/'|xargs`
181
182         if [ -n "${FSCK_ARGS}" -a -n "$devices" ]; then
183                 if [ -x /sbin/@PFSCK@ ] ; then
184                         echo "@PFSCK@ $devices -- ${FSCK_ARGS}"
185                         /sbin/@PFSCK@ $devices -- ${FSCK_ARGS}
186                         if [ $? -ne 0 -a $? -ne 1 ] ; then
187                                 echo "FAILED: @PFSCK@ -- ${FSCK_ARGS}: $?"
188                                 return 1
189                         fi
190                 else
191                         echo "/sbin/@PFSCK@ not found"
192                         return 1
193                 fi
194         fi
195         return 0
196 }
197
198 # Usage: test_feature_flag <dev> <flag>
199 test_feature_flag()
200 {
201         local dev=$1
202         local flag=$2
203         local result=1
204         local feature
205
206         for feature in `/sbin/@TUNE2FS@ -l $dev 2>/dev/null \
207                                 | grep features: | sed -e 's/^.*: //'`; do
208                 if [ "$feature" == "$flag" ]; then
209                         result=0
210                         break
211                 fi
212         done
213
214         return $result
215 }
216
217 # Usage: mmp_test <dev>
218 # Returns 0 if it is set or not required, 1 if unset and required or error.
219 mmp_test ()
220 {
221         local dev=$1
222         local result=0
223
224         if [ "$REQUIRE_MMP_FEATURE" == "yes" ]; then
225                 if [ -x /sbin/@TUNE2FS@ ]; then
226                         if ! test_feature_flag $dev "mmp"; then
227                                 echo "mmp feature flag is not set on $dev"
228                                 result=1
229                         fi
230                 else
231                         echo "/sbin/@TUNE2FS@ not found"
232                         result=1
233                 fi
234         fi
235
236         return $result
237 }
238
239 # Usage: label_to_mountpt <label>
240 # Prints mount point path, if label matches a local or foreign server.
241 label_to_mountpt ()
242 {
243         local label=$1
244         local serv
245
246         for serv in $LOCAL_SRV; do
247                 if [ "$serv" == "$label" ]; then
248                         echo "$LOCAL_MOUNT_DIR/$label"
249                         return
250                 fi
251         done
252         for serv in $FOREIGN_SRV; do
253                 if [ "$serv" == "$label" ]; then
254                         echo "$FOREIGN_MOUNT_DIR/$label"
255                         return
256                 fi
257         done
258 }
259
260 # Usage: label_to_device <label>
261 # Prints canonical device path.
262 label_to_device ()
263 {
264         local label=$1
265         local path=/dev/disk/by-label/$label
266
267         if [ -h $path ] ; then
268                 readlink --canonicalize $path
269         else
270                 $LDEV -d $label
271         fi
272 }
273
274 # helper for mountpt_is_active() and device_is_active()
275 declare -r awkprog='BEGIN {rc = 1;}
276                         { if ($field == path) {rc = 0;} }
277                     END { exit rc;}'
278
279 # Usage: mountpt_is_active <label>
280 # Return 1 (inactive) on invalid label.
281 mountpt_is_active ()
282 {
283         local dir=`label_to_mountpt $1`
284         local result=1
285
286         if [ -n "$dir" ]; then
287                 cat /proc/mounts | awk "$awkprog" field=2 path=$dir
288                 result=$?
289         fi
290         return $result
291 }
292
293 # Usage: device_is_active <label>
294 # Return 1 (inactive) on invalid label.
295 device_is_active ()
296 {
297         local dev=`label_to_device $1`
298         local result=1
299
300         if [ -n "$dev" ]; then
301                 cat /proc/mounts | awk "$awkprog" field=1 path=$dir
302                 result=$?
303         fi
304         return $result
305 }
306
307 # Usage: mount_one_device <label> <successflag> [devtype]
308 # Remove <succesflag> on error (trick to detect errors after parallel runs).
309 mount_one_device ()
310 {
311         local label=$1
312         local successflag=$2
313         local devtype=$3
314         local dev=`label_to_device $label`
315         local dir=`label_to_mountpt $label`
316
317         # $dir and $dev have already been checked at ths point
318         if [ ! -d $dir ] && ! mkdir -p $dir; then
319                 rm -f $successflag
320                 return
321         fi
322         echo "Mounting $dev on $dir"
323         if ! mount -t lustre $MOUNT_OPTIONS $dev $dir; then
324                 rm -f $successflag
325                 return
326         fi
327 }
328
329 # Usage: assemble_md_device <device>
330 # Assemble the md device backing device.
331 # Return 0 if the array is assembled successfully or was already active,
332 # otherwise return error code from mdadm.
333 assemble_md_device ()
334 {
335         local dev=$1
336         local raidtab=$2
337         local args="-Aq"
338         local result=0
339
340         if [ -n "$raidtab" ] ; then
341                 args="$args -c $raidtab"
342         fi
343
344         if ! md_array_is_active $dev ; then
345                 mdadm $args $dev
346                 result=$?
347         fi
348
349         udev_trigger
350         return $result
351 }
352
353 # Usage: stop_md_device <device>
354 # Stop the md device backing device.
355 # Return 0 if the array is stopped successfully or was not active,
356 # otherwise return error code from mdadm.
357 stop_md_device ()
358 {
359         local dev=$1
360         local raidtab=$2
361         local args="-Sq"
362         local result=0
363
364         if [ -n "$raidtab" ] ; then
365                 args="$args -c $raidtab"
366         fi
367
368         if [ -e $dev ] && md_array_is_active $dev ; then
369                 mdadm $args $dev
370                 result=$?
371         fi
372
373         return $result
374 }
375
376 # Usage: md_array_is_active <device>
377 # return 0 if device is an active md RAID array, or 1 otherwise
378 md_array_is_active ()
379 {
380         local device=$1
381
382         [ -e "$device" ] || return 1
383
384         mdadm --detail -t $device > /dev/null 2>&1
385         if [ $? -eq 4 ] ; then
386                 return 1
387         fi
388         return 0
389 }
390
391 # Usage: start_services <label> [ <label> ... ]
392 # fsck and mount any devices listed as arguments (in parallel).
393 # Attempt to assemble software raid arrays or zfs pools backing
394 # Lustre devices.
395 start_services ()
396 {
397         local result=0
398         local devices=""
399         local dir dev label
400         local successflag
401         local labels
402
403         start_zfs_services
404         for label in $*; do
405                 dir=`label_to_mountpt $label`
406                 devtype=`$LDEV -t $label`
407                 dev=`label_to_device $label`
408                 journal=`$LDEV -j $label`
409                 raidtab=`$LDEV -r $label`
410
411                 if [ -z "$dir" ] || [ -z "$dev" ]; then
412                         echo "$label is not a valid lustre label on this node"
413                         result=2
414                         continue
415                 fi
416
417                 if [ "$devtype" = "md" ] ; then
418                         if ! assemble_md_device $dev $raidtab ; then
419                                 echo "failed to assemble array $dev backing $label"
420                                 result=2
421                                 continue
422                         fi
423                 elif [ "$devtype" = "zfs" ] ; then
424                         if ! import_zpool $label ; then
425                                 result=2
426                         fi
427                 fi
428
429                 # Journal device field in ldev.conf may be "-" or empty,
430                 # so only attempt to assemble if its an absolute path.
431                 # Ignore errors since the journal device may not be an
432                 # md device.
433                 if echo $journal | grep -q ^/ ; then
434                         assemble_md_device $journal $raidtab 2>/dev/null
435                 fi
436
437                 if [ "x$devtype" != "xzfs" ] ; then
438                         if mountpt_is_active $label || \
439                            device_is_active $label; then
440                                 echo "$label is already mounted"
441                                 # no error
442                                 continue
443                         fi
444                         if ! mmp_test $dev; then
445                                 result=2
446                                 continue
447                         fi
448                         if ! adjust_scsi_timeout $dev; then
449                                 result=2
450                                 continue
451                         fi
452                 fi
453                 devices="$devices $dev"
454                 labels="$labels $label"
455         done
456         if [ $result == 0 ]; then
457                 fsck_test $devices || return 2
458
459                 # Fork to handle multiple mount_one_device()'s in parallel.
460                 # Errors occurred if $successflag comes up missing afterwards.
461                 successflag=`mktemp`
462                 [ -e $successflag ] || return 2
463                 for label in $labels; do
464                         mount_one_device $label $successflag `$LDEV -t $label` &
465                         # stagger to avoid module loading races
466                         if [[ -n $MOUNT_DELAY && $MOUNT_DELAY -gt 0 ]] ; then
467                                 sleep $MOUNT_DELAY
468                         fi
469                 done
470                 for label in $labels; do
471                         wait
472                 done
473                 [ -e $successflag ] || return 2
474                 rm -f $successflag
475         fi
476
477         return $result
478 }
479
480 # Usage: stop_services <label> [ <label> ... ]
481 # Unmount any devices listed as arguments (serially).
482 # Any devices which are not mounted or don't exist are skipped with no error.
483 stop_services ()
484 {
485         local labels=$*
486         local result=0
487         local dir dev label
488
489         for label in $labels; do
490                 dir=`label_to_mountpt $label`
491                 if [ -z "$dir" ]; then
492                         echo "$label is not a valid lustre label on this node"
493                         result=2
494                         continue
495                 fi
496                 if ! mountpt_is_active $label; then
497                         #echo "$label is not mounted"
498                         # no error
499                         continue
500                 fi
501                 echo "Unmounting $dir"
502                 umount $dir || result=2
503         done
504         # double check!
505         for label in $labels; do
506                 if mountpt_is_active $label; then
507                         dir=`label_to_mountpt $label`
508                         echo "Mount point $dir is still active"
509                         result=2
510                 fi
511                 if device_is_active $label; then
512                         dev=`label_to_device $label`
513                         echo "Device $dev is still active"
514                         result=2
515                 fi
516         done
517         stop_devices $labels
518
519         return $result
520 }
521
522 # Usage: start_lustre_services [local|foreign|all|<label>]
523 # If no parameter is specified, local devices will be started.
524 start_lustre_services ()
525 {
526         local labels=""
527
528         case "$1" in
529                 ""|local)
530                         labels=$LOCAL_SRV
531                         ;;
532                 foreign)
533                         labels=$FOREIGN_SRV
534                         ;;
535                 all)    labels="$LOCAL_SRV $FOREIGN_SRV"
536                         ;;
537                 *)      labels="$1"
538                         ;;
539         esac
540         # for use by heartbeat V1 resource agent:
541         # starting an already-started service must not be an error
542         start_services $labels || exit 2
543 }
544
545 # Usage: stop_lustre_services [local|foreign|all|<label>]
546 # If no parameter is specified all devices will be stopped.
547 stop_lustre_services ()
548 {
549         local labels=""
550
551         case "$1" in
552                 local) labels=$LOCAL_SRV
553                         ;;
554                 foreign)
555                         labels=$FOREIGN_SRV
556                         ;;
557                 ""|all) labels="$LOCAL_SRV $FOREIGN_SRV"
558                         ;;
559                 *)      labels="$1"
560                         ;;
561         esac
562         # for use by heartbeat V1 resource agent:
563         # stopping already-stopped service must not be an error
564         stop_services $labels || exit 2
565 }
566
567 # General lustre health check - not device specific.
568 health_check ()
569 {
570         old_nullglob="`shopt -p nullglob`"
571         shopt -u nullglob
572
573         STATE="stopped"
574         # LSB compliance - return 3 if service is not running
575         # Lustre-specific returns
576         # 150 - partial startup
577         # 151 - health_check unhealthy
578         # 152 - LBUG
579         RETVAL=3
580         egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
581
582         # check for any configured devices (may indicate partial startup)
583         if [ -d /proc/fs/lustre ]; then
584                 if [ -n "`cat /proc/fs/lustre/devices 2> /dev/null`" ] ; then
585                         STATE="partial"
586                         RETVAL=150
587                 fi
588
589                 # check for either a server or a client filesystem
590                 MDT="`ls /proc/fs/lustre/mdt/*/recovery_status 2> /dev/null`"
591                 OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status \
592                         2> /dev/null`"
593                 LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
594                 if [ "$MDT" -o "$OST" -o "$LLITE" ]; then
595                         STATE="running"
596                         RETVAL=0
597                 fi
598         else
599                 # check if this is a router
600                 if [ -d /proc/sys/lnet ]; then
601                         ROUTER="`cat /proc/sys/lnet/routes | head -1 |
602                                 grep -i -c \"Routing enabled\"`"
603                         if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
604                                 STATE="running"
605                                 RETVAL=0
606                         fi
607                 fi
608         fi
609
610         # check for server disconnections
611         DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
612         if [ -n "$DISCON" ] ; then
613                 STATE="disconnected"
614                 RETVAL=0
615         fi
616
617         # check for servers in recovery
618         if [ -n "$MDT$OST" ] && grep -q RECOV $MDT $OST ; then
619                 STATE="recovery"
620                 RETVAL=0
621         fi
622
623         # check for error in health_check
624         HEALTH="/proc/fs/lustre/health_check"
625         if [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH ; then
626                 STATE="unhealthy"
627                 RETVAL=1
628         fi
629
630         # check for LBUG
631         if [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH ; then
632                 STATE="LBUG"
633                 RETVAL=152
634         fi
635
636         echo $STATE
637         eval $old_nullglob
638         return $RETVAL
639 }
640
641 # Usage: status [local|foreign|all|<label>]
642 # If no parameter is specified, general lustre health status will be reported.
643 status ()
644 {
645         local labels=""
646         local label dir
647         local valid_devs=0
648
649         case "$1" in
650                 local) labels=$LOCAL_SRV;
651                         ;;
652                 foreign)
653                         labels=$FOREIGN_SRV;
654                         ;;
655                 all)    labels="$LOCAL_SRV $FOREIGN_SRV"
656                         ;;
657                 "")     # ASSUMPTION: this is not the heartbeat res agent
658                         health_check
659                         exit $?
660                         ;;
661                 *)      labels=$1
662                         ;;
663         esac
664         # for use by heartbeat V1 resource agent:
665         # print "running" if *anything* is running.
666         for label in $labels; do
667                 dir=`label_to_device $label`
668                 if [ -z "$dir" ]; then
669                         echo "$label is not a valid lustre label on this node"
670                         # no error
671                         continue
672                 fi
673                 valid_devs=1
674                 if mountpt_is_active $label || device_is_active $label; then
675                         echo "running"
676                         exit 0
677                 fi
678         done
679         [ $valid_devs == 1 ] && echo "stopped"
680         exit 3
681 }
682
683 usage ()
684 {
685         cat <<EOF
686 Usage: lustre {start|stop|status|restart|reload|condrestart}
687
688        lustre start  [local|foreign|<label>]
689        lustre stop   [local|foreign|<label>]
690        lustre status [local|foreign|<label>]
691 EOF
692         exit 1
693 }
694
695 # See how we were called.
696 case "$1" in
697   start)
698         if [ $# -gt 2 ] ; then
699                 echo "ERROR: Too many arguments."
700                 usage
701         fi
702         run_preexec_check "start"
703         start_lustre_services $2
704         run_postexec_check "start"
705         ;;
706   stop)
707         if [ $# -gt 2 ] ; then
708                 echo "ERROR: Too many arguments."
709                 usage
710         fi
711         run_preexec_check "stop"
712         stop_lustre_services $2
713         run_postexec_check "stop"
714         ;;
715   status)
716         if [ $# -gt 2 ] ; then
717                 echo "ERROR: Too many arguments."
718                 usage
719         fi
720         status $2
721         ;;
722   restart)
723         $0 stop
724         $0 start
725         ;;
726   reload)
727         ;;
728   probe)
729         ;;
730   condrestart)
731         if grep lustre /proc/mounts ; then
732                 $0 stop
733                 $0 start
734         fi
735         ;;
736   *)
737         usage
738 esac
739
740 exit 0