3 # lustre This shell script takes care of starting and stopping
7 # description: Part of the lustre file system.
9 # config: /etc/sysconfig/lustre
11 # Source function library.
12 . /etc/rc.d/init.d/functions
14 # Source networking configuration.
15 if [ ! -f /etc/sysconfig/network ]; then
19 . /etc/sysconfig/network
21 LDEV=${LDEV:-"/usr/sbin/ldev"}
22 ZPOOL_LAYOUT=/usr/bin/zpool_layout
23 UDEVADM=${UDEVADM:-/sbin/udevadm}
25 # Check that networking is up.
26 [ "${NETWORKING}" = "no" ] && exit 0
28 # Check for and source configuration file otherwise set defaults
29 [ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
30 FSCK_ARGS=${FSCK_ARGS:-""}
31 MOUNT_OPTIONS=${MOUNT_OPTIONS:-""}
32 LOCAL_SRV=${LOCAL_SRV:-"`$LDEV -l 2>/dev/null`"}
33 FOREIGN_SRV=${FOREIGN_SRV:-"`$LDEV -f 2>/dev/null`"}
34 REQUIRE_MMP_FEATURE=${REQUIRE_MMP_FEATURE:-${FOREIGN_SRV:+"yes"}}
35 LOCAL_MOUNT_DIR=${LOCAL_MOUNT_DIR:-"/mnt/lustre/local"}
36 FOREIGN_MOUNT_DIR=${FOREIGN_MOUNT_DIR:-"/mnt/lustre/foreign"}
37 SETUP_DEVICES=${SETUP_DEVICES:-""}
38 ZPOOL_LAYOUT_BUSES=${ZPOOL_LAYOUT_BUSES:-""}
39 ZPOOL_LAYOUT_PORTS=${ZPOOL_LAYOUT_PORTS:-""}
40 ZPOOL_LAYOUT_MAP=${ZPOOL_LAYOUT_MAP:-""}
41 MOUNT_DELAY=${MOUNT_DELAY:-2}
42 LOAD_ZFS=${LOAD_ZFS:-""}
48 if [ -n "$ZPOOL_LAYOUT_BUSES" -a -n "$ZPOOL_LAYOUT_PORTS" ] ; then
49 MAP_ARG=${ZPOOL_LAYOUT_MAP:+"-m $ZPOOL_LAYOUT_MAP"}
50 $ZPOOL_LAYOUT -t -b "$ZPOOL_LAYOUT_BUSES" \
51 -p "$ZPOOL_LAYOUT_PORTS" $MAP_ARG
53 if [ "$LOAD_ZFS" = "yes" ] && ! modprobe zfs ; then
54 echo "Failed to load zfs module. Aborting."
64 for label in $labels; do
65 devtype=`$LDEV -t $label`
66 if [ "$devtype" = "zfs" ] ; then
68 elif [ "$devtype" = "md" ] ; then
69 dev=`label_to_device $label`
70 journal=`$LDEV -j $label`
72 stop_md_device $journal
81 local pool=`$LDEV -z $label`
82 local args="-N $ZPOOL_IMPORT_ARGS"
83 local cache=`$LDEV -r $label`
84 # -c is incompatible with -d
85 if [ -n "$cache" ] ; then
86 args="$args -c $cache"
87 elif [ -n "$ZPOOL_IMPORT_DIR" ] ; then
88 args="$args -d $ZPOOL_IMPORT_DIR"
89 elif [ -d "/dev/disk/by-vdev" ] ; then
90 args="$args -d /dev/disk/by-vdev"
91 elif [ -d "/dev/mapper" ] ; then
92 args="$args -d /dev/mapper"
95 if zpool status $pool >/dev/null 2>&1 ; then
97 elif [ -n "$pool" ] ; then
98 zpool import $pool $args 2>/dev/null
107 local pool=`$LDEV -z $label`
108 zpool export $pool 2>/dev/null
111 # Trigger udev and wait for it to settle.
114 if [ -x ${UDEVADM} ]; then
115 ${UDEVADM} trigger --action=change --subsystem-match=block
123 # Usage: run_preexec_check [ start | restart | condrestart ]
124 # The single parameter will be passed to the PREEXEC_SCRIPT
127 if [ -n "$PREEXEC_CHECK" ] && ! $PREEXEC_CHECK ; then
128 echo "Pre-exec check \"$PREEXEC_CHECK\" failed. Aborting."
132 if [ -n "$PREEXEC_SCRIPT" ] && ! "$PREEXEC_SCRIPT" "$1" ; then
133 echo "Pre-exec script \"$PREEXEC_SCRIPT\" failed. Aborting."
138 # Usage: run_postexec_check [ start | restart | condrestart ]
139 # The single parameter will be passed to the PREEXEC_SCRIPT
140 run_postexec_check ()
142 if [ -n "$POSTEXEC_CHECK" ] && ! $POSTEXEC_CHECK ; then
143 echo "Post-exec check \"$POSTEXEC_CHECK\" failed. Aborting."
147 if [ -n "$POSTEXEC_SCRIPT" ] && ! "$POSTEXEC_SCRIPT" "$1" ; then
148 echo "Post-exec script \"$POSTEXEC_SCRIPT\" failed. Aborting."
153 # Usage: adjust_scsi_timeout <dev>
154 adjust_scsi_timeout ()
158 if [ -n "$SCSI_DEVICE_TIMEOUT" ]; then
159 # make sure that it is actually a SCSI (sd) device
160 local name=`basename $dev`
161 local proc=/sys/block/${name}/device/timeout
162 local driver=`readlink /sys/block/${name}/device/driver`
163 if [ -n "$driver" ] && [ "`basename $driver`" == "sd" ]; then
164 if ! echo $SCSI_DEVICE_TIMEOUT >$proc; then
165 echo "FAILED: could not adjust ${dev} timeout"
173 # Usage: fsck_test <dev> [ <dev> ... ]
174 # Checks all devices in parallel if FSCK_ARGS is set.
179 # Filter out non-absolute paths, which are probably ZFS datasets
180 devices=`echo $devices |xargs -n 1|grep '^/'|xargs`
182 if [ -n "${FSCK_ARGS}" -a -n "$devices" ]; then
183 if [ -x /sbin/@PFSCK@ ] ; then
184 echo "@PFSCK@ $devices -- ${FSCK_ARGS}"
185 /sbin/@PFSCK@ $devices -- ${FSCK_ARGS}
186 if [ $? -ne 0 -a $? -ne 1 ] ; then
187 echo "FAILED: @PFSCK@ -- ${FSCK_ARGS}: $?"
191 echo "/sbin/@PFSCK@ not found"
198 # Usage: test_feature_flag <dev> <flag>
206 for feature in `/sbin/@TUNE2FS@ -l $dev 2>/dev/null \
207 | grep features: | sed -e 's/^.*: //'`; do
208 if [ "$feature" == "$flag" ]; then
217 # Usage: mmp_test <dev>
218 # Returns 0 if it is set or not required, 1 if unset and required or error.
224 if [ "$REQUIRE_MMP_FEATURE" == "yes" ]; then
225 if [ -x /sbin/@TUNE2FS@ ]; then
226 if ! test_feature_flag $dev "mmp"; then
227 echo "mmp feature flag is not set on $dev"
231 echo "/sbin/@TUNE2FS@ not found"
239 # Usage: label_to_mountpt <label>
240 # Prints mount point path, if label matches a local or foreign server.
246 for serv in $LOCAL_SRV; do
247 if [ "$serv" == "$label" ]; then
248 echo "$LOCAL_MOUNT_DIR/$label"
252 for serv in $FOREIGN_SRV; do
253 if [ "$serv" == "$label" ]; then
254 echo "$FOREIGN_MOUNT_DIR/$label"
260 # Usage: label_to_device <label>
261 # Prints canonical device path.
265 local path=/dev/disk/by-label/$label
267 if [ -h $path ] ; then
268 readlink --canonicalize $path
274 # helper for mountpt_is_active() and device_is_active()
275 declare -r awkprog='BEGIN {rc = 1;}
276 { if ($field == path) {rc = 0;} }
279 # Usage: mountpt_is_active <label>
280 # Return 1 (inactive) on invalid label.
283 local dir=`label_to_mountpt $1`
286 if [ -n "$dir" ]; then
287 cat /proc/mounts | awk "$awkprog" field=2 path=$dir
293 # Usage: device_is_active <label>
294 # Return 1 (inactive) on invalid label.
297 local dev=`label_to_device $1`
300 if [ -n "$dev" ]; then
301 cat /proc/mounts | awk "$awkprog" field=1 path=$dir
307 # Usage: mount_one_device <label> <successflag> [devtype]
308 # Remove <succesflag> on error (trick to detect errors after parallel runs).
314 local dev=`label_to_device $label`
315 local dir=`label_to_mountpt $label`
317 # $dir and $dev have already been checked at ths point
318 if [ ! -d $dir ] && ! mkdir -p $dir; then
322 echo "Mounting $dev on $dir"
323 if ! mount -t lustre $MOUNT_OPTIONS $dev $dir; then
329 # Usage: assemble_md_device <device>
330 # Assemble the md device backing device.
331 # Return 0 if the array is assembled successfully or was already active,
332 # otherwise return error code from mdadm.
333 assemble_md_device ()
340 if [ -n "$raidtab" ] ; then
341 args="$args -c $raidtab"
344 if ! md_array_is_active $dev ; then
353 # Usage: stop_md_device <device>
354 # Stop the md device backing device.
355 # Return 0 if the array is stopped successfully or was not active,
356 # otherwise return error code from mdadm.
364 if [ -n "$raidtab" ] ; then
365 args="$args -c $raidtab"
368 if [ -e $dev ] && md_array_is_active $dev ; then
376 # Usage: md_array_is_active <device>
377 # return 0 if device is an active md RAID array, or 1 otherwise
378 md_array_is_active ()
382 [ -e "$device" ] || return 1
384 mdadm --detail -t $device > /dev/null 2>&1
385 if [ $? -eq 4 ] ; then
391 # Usage: start_services <label> [ <label> ... ]
392 # fsck and mount any devices listed as arguments (in parallel).
393 # Attempt to assemble software raid arrays or zfs pools backing
405 dir=`label_to_mountpt $label`
406 devtype=`$LDEV -t $label`
407 dev=`label_to_device $label`
408 journal=`$LDEV -j $label`
409 raidtab=`$LDEV -r $label`
411 if [ -z "$dir" ] || [ -z "$dev" ]; then
412 echo "$label is not a valid lustre label on this node"
417 if [ "$devtype" = "md" ] ; then
418 if ! assemble_md_device $dev $raidtab ; then
419 echo "failed to assemble array $dev backing $label"
423 elif [ "$devtype" = "zfs" ] ; then
424 if ! import_zpool $label ; then
429 # Journal device field in ldev.conf may be "-" or empty,
430 # so only attempt to assemble if its an absolute path.
431 # Ignore errors since the journal device may not be an
433 if echo $journal | grep -q ^/ ; then
434 assemble_md_device $journal $raidtab 2>/dev/null
437 if [ "x$devtype" != "xzfs" ] ; then
438 if mountpt_is_active $label || \
439 device_is_active $label; then
440 echo "$label is already mounted"
444 if ! mmp_test $dev; then
448 if ! adjust_scsi_timeout $dev; then
453 devices="$devices $dev"
454 labels="$labels $label"
456 if [ $result == 0 ]; then
457 fsck_test $devices || return 2
459 # Fork to handle multiple mount_one_device()'s in parallel.
460 # Errors occurred if $successflag comes up missing afterwards.
462 [ -e $successflag ] || return 2
463 for label in $labels; do
464 mount_one_device $label $successflag `$LDEV -t $label` &
465 # stagger to avoid module loading races
466 if [[ -n $MOUNT_DELAY && $MOUNT_DELAY -gt 0 ]] ; then
470 for label in $labels; do
473 [ -e $successflag ] || return 2
480 # Usage: stop_services <label> [ <label> ... ]
481 # Unmount any devices listed as arguments (serially).
482 # Any devices which are not mounted or don't exist are skipped with no error.
489 for label in $labels; do
490 dir=`label_to_mountpt $label`
491 if [ -z "$dir" ]; then
492 echo "$label is not a valid lustre label on this node"
496 if ! mountpt_is_active $label; then
497 #echo "$label is not mounted"
501 echo "Unmounting $dir"
502 umount $dir || result=2
505 for label in $labels; do
506 if mountpt_is_active $label; then
507 dir=`label_to_mountpt $label`
508 echo "Mount point $dir is still active"
511 if device_is_active $label; then
512 dev=`label_to_device $label`
513 echo "Device $dev is still active"
522 # Usage: start_lustre_services [local|foreign|all|<label>]
523 # If no parameter is specified, local devices will be started.
524 start_lustre_services ()
535 all) labels="$LOCAL_SRV $FOREIGN_SRV"
540 # for use by heartbeat V1 resource agent:
541 # starting an already-started service must not be an error
542 start_services $labels || exit 2
545 # Usage: stop_lustre_services [local|foreign|all|<label>]
546 # If no parameter is specified all devices will be stopped.
547 stop_lustre_services ()
552 local) labels=$LOCAL_SRV
557 ""|all) labels="$LOCAL_SRV $FOREIGN_SRV"
562 # for use by heartbeat V1 resource agent:
563 # stopping already-stopped service must not be an error
564 stop_services $labels || exit 2
567 # General lustre health check - not device specific.
570 old_nullglob="`shopt -p nullglob`"
574 # LSB compliance - return 3 if service is not running
575 # Lustre-specific returns
576 # 150 - partial startup
577 # 151 - health_check unhealthy
580 egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
582 # check for any configured devices (may indicate partial startup)
583 if [ -d /proc/fs/lustre ]; then
584 if [ -n "`cat /proc/fs/lustre/devices 2> /dev/null`" ] ; then
589 # check for either a server or a client filesystem
590 MDT="`ls /proc/fs/lustre/mdt/*/recovery_status 2> /dev/null`"
591 OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status \
593 LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
594 if [ "$MDT" -o "$OST" -o "$LLITE" ]; then
599 # check if this is a router
600 if [ -d /proc/sys/lnet ]; then
601 ROUTER="`cat /proc/sys/lnet/routes | head -1 |
602 grep -i -c \"Routing enabled\"`"
603 if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
610 # check for server disconnections
611 DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
612 if [ -n "$DISCON" ] ; then
617 # check for servers in recovery
618 if [ -n "$MDT$OST" ] && grep -q RECOV $MDT $OST ; then
623 # check for error in health_check
624 HEALTH="/proc/fs/lustre/health_check"
625 if [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH ; then
631 if [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH ; then
641 # Usage: status [local|foreign|all|<label>]
642 # If no parameter is specified, general lustre health status will be reported.
650 local) labels=$LOCAL_SRV;
655 all) labels="$LOCAL_SRV $FOREIGN_SRV"
657 "") # ASSUMPTION: this is not the heartbeat res agent
664 # for use by heartbeat V1 resource agent:
665 # print "running" if *anything* is running.
666 for label in $labels; do
667 dir=`label_to_device $label`
668 if [ -z "$dir" ]; then
669 echo "$label is not a valid lustre label on this node"
674 if mountpt_is_active $label || device_is_active $label; then
679 [ $valid_devs == 1 ] && echo "stopped"
686 Usage: lustre {start|stop|status|restart|reload|condrestart}
688 lustre start [local|foreign|<label>]
689 lustre stop [local|foreign|<label>]
690 lustre status [local|foreign|<label>]
695 # See how we were called.
698 if [ $# -gt 2 ] ; then
699 echo "ERROR: Too many arguments."
702 run_preexec_check "start"
703 start_lustre_services $2
704 run_postexec_check "start"
707 if [ $# -gt 2 ] ; then
708 echo "ERROR: Too many arguments."
711 run_preexec_check "stop"
712 stop_lustre_services $2
713 run_postexec_check "stop"
716 if [ $# -gt 2 ] ; then
717 echo "ERROR: Too many arguments."
731 if grep lustre /proc/mounts ; then