/usr/sbin/ldev - list devices, determine validity, etc.
/usr/sbin/lhbadm - wrapper for heartbeat utils for failover/failback/status
/etc/ha.d/resource.d/Lustre - heartbeat resource agent (wraps init script)
/etc/init.d/lustre - lustre init script
/etc/init.d/lnet - lnet init script
/usr/sbin/haconfig - helper script for building heartbeat config files
The scripts use two configuration files:
/etc/ldev.conf - maps hostnames to failover partners, devices, and labels
/etc/nids - hostnames to lustre NIDS
In addition to heartbeat support, the ldev script enables parallel
execution of commands against all luns configured on a server. The
lustre init script supports devices backed by Linux software RAID, ZFS,
or traditional block devices.
NOTE: these scripts presume the udev rules for persistent block device
naming are in place, in particular that lustre labels can be mapped to
block devices in /dev/disk/by-id.
Change-Id: I8391744ce6eed989c061f131aca4a2da7b5d51b2
Signed-off-by: Ned Bass <bass6@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-on: http://review.whamcloud.com/290
Reviewed-by: Doug Oucharek <doug@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Tested-by: Hudson
Reviewed-by: Oleg Drokin <green@whamcloud.com>
%attr(-, root, root) %{_libexecdir}/lustre/lc_common
%attr(-, root, root) %{_sysconfdir}/udev/rules.d/99-lustre.rules
+
+%attr(-, root, root) %{_sysconfdir}/init.d/lnet
+%attr(-, root, root) %{_sysconfdir}/init.d/lustre
+%attr(-, root, root) %{_sysconfdir}/ldev.conf
+%attr(-, root, root) %{_sysconfdir}/sysconfig/lustre
+%attr(-, root, root) %{_libexecdir}/lustre/haconfig
+%attr(-, root, root) %{_sysconfdir}/ha.d/resource.d/Lustre
EOF
if [ -f $RPM_BUILD_ROOT%{_libdir}/libcfsutil.a ] ; then
lustre/quota/Makefile
lustre/quota/autoMakefile
lustre/scripts/Makefile
+lustre/scripts/lustre
lustre/tests/Makefile
lustre/tests/mpi/Makefile
lustre/utils/Makefile
#
EXTRA_DIST = lustre.dtd slapd-lustre.conf lustre2ldif.xsl top.ldif \
- 99-lustre.rules
+ 99-lustre.rules lustre ldev.conf
ldapconfdir = $(sysconfdir)/openldap
if UTILS
udevrulesdir = $(sysconfdir)/udev/rules.d
udevrules_DATA = 99-lustre.rules
+
+sysconfigdir = $(sysconfdir)/sysconfig
+sysconfig_DATA = lustre
+
+sysconf_DATA = ldev.conf
--- /dev/null
+# example /etc/ldev.conf
+#
+#local foreign/- label [md|zfs:]device-path [journal-path]/- [raidtab]
+#
+#zeno-mds1 - zeno-MDT0000 zfs:lustre-zeno-mds1/mdt1
+#
+#zeno1 zeno5 zeno-OST0000 zfs:lustre-zeno1/ost1
+#zeno2 zeno6 zeno-OST0001 zfs:lustre-zeno2/ost1
+#zeno3 zeno7 zeno-OST0002 zfs:lustre-zeno3/ost1
+#zeno4 zeno8 zeno-OST0003 zfs:lustre-zeno4/ost1
+#zeno5 zeno1 zeno-OST0004 zfs:lustre-zeno5/ost1
+#zeno6 zeno2 zeno-OST0005 zfs:lustre-zeno6/ost1
+#zeno7 zeno3 zeno-OST0006 zfs:lustre-zeno7/ost1
+#zeno8 zeno4 zeno-OST0007 zfs:lustre-zeno8/ost1
--- /dev/null
+# Configuration options for /etc/init.d/lustre
+
+# The command in PREEXEC_SCRIPT is run before starting services. Its first
+# parameter is mode of the init script (start|restart|condrestart). If the
+# command has a non-zero return code, then the init script will abort without
+# taking any action.
+#PREEXEC_SCRIPT="/usr/bin/somescript"
+
+# The command in PREEXEC_CHECK is run before starting services. It is not
+# passed any arguments. If the command has a non-zero return code, then the
+# init script will abort without taking any action.
+#PREEXEC_CHECK="command"
+
+# The commands in POSTEXEC_SCRIPT and/or POSTEXEC_CHECK are run after starting
+# services. If the command has a non-zero return code, then the init script
+# will terminate with an exit status of 1.
+#POSTEXEC_SCRIPT="/usr/bin/somescript"
+#POSTEXEC_CHECK="command"
+
+# If SCSI_DEVICE_TIMEOUT is set, its value is echoed into
+# /sys/block/sdXX/device/timeout
+# before checking file systems or starting Lustre
+#SCSI_DEVICE_TIMEOUT=60
+
+# LOCAL_SRV or FOREIGN_SRV can be set to a space-delimited list of
+# labels that will be mounted as local and foreign (failover) lustre services.
+# If unset or null, /etc/ldev.conf establishes the labels for these services.
+#LOCAL_SRV="`shopt -s nullglob && cd /dev/disk/by-label 2>/dev/null && echo *-OST* *-MDT* *MGS* *MGT*`"
+
+# Before mounting any lustre backend devices, the init script will
+# run pfsck.ldiskfs only if the following FSCK_ARGS variable is a
+# non-empty string. There are no default options for this fsck.
+# The command takes the form:
+#
+# /sbin/pfsck.ldiskfs $devices -- ${FSCK_ARGS}
+#
+#FSCK_ARGS="-p"
+
+# Uncomment to insert server mount options - see mount.lustre(8)
+#MOUNT_OPTIONS="-o abort_recov"
+
+# Stagger mounts by MOUNT_DELAY seconds to avoid possible module loading races
+# due to multiple mount commands running in parallel. This obviously does not
+# eliminate the race but provides a safety buffer. The default is 2 seconds.
+# Set to 0 or empty string to disable staggering of mounts.
+#MOUNT_DELAY=0
+
+# Uncomment to disable the check for the mmp ldiskfs feature (only
+# applies if foreign # devices are configured).
+# REQUIRE_MMP_FEATURE=no
+
+# Override default mount points for lustre services
+#LOCAL_MOUNT_DIR=/mnt/lustre/local
+#FOREIGN_MOUNT_DIR=/mnt/lustre/foreign
+
+# Uncomment to cause the lustre init scripts to explicitly modprobe the zfs
+# module when starting services. The zfs module is normally loaded
+# automatically by the zfs command line utilities, for example when the zpool
+# is imported.
+#LOAD_ZFS="yes"
+
+# Uncomment to pass additional arguments to 'zpool import'. For example,
+# the -m option can be used to allow the pool to be imported even if its
+# missing a non-critical log device.
+#ZPOOL_IMPORT_ARGS="-m"
+
+# Uncomment to force ZFS to import the pool using the device names in the
+# given directory. By default, the /dev/disk/by-vdev/ device names will be
+# used if they are configured followed by the /dev/mapper device names.
+#ZPOOL_IMPORT_DIR="/dev/disk/by-id"
plot-llstat.8 l_getgroups.8 lst.8 routerstat.8 lshowmount.8 \
ll_recover_lost_found_objs.8 llog_reader.8 llapi_file_open.3 \
llapi_file_create.3 llapi_file_get_stripe.3 liblustreapi.7 \
- lustre_rsync.8 lfs_migrate.1
+ lustre_rsync.8 lfs_migrate.1 lhbadm.8 ldev.8 ldev.conf.5 nids.5
if UTILS
man_MANS = $(MANFILES)
--- /dev/null
+.TH ldev 8 Lustre ldev ldev
+.SH NAME
+ldev \- lustre device utility
+.SH SYNOPSIS
+.B "ldev [OPTIONS]"
+.br
+.SH DESCRIPTION
+.B ldev
+can be used to query information about lustre devices configured in
+/etc/ldev.conf. It is used by the lustre init script.
+.SH OPTIONS
+.B ldev
+accepts the following options:
+.TP
+.I "-h, --help"
+Display help message.
+.TP
+.I "-c, --config FILE"
+Set path to config file.
+.TP
+.I "-H, --hostname NAME"
+Use NAME instead of local hostname for queries.
+.TP
+.I "-p, --partner"
+Print hostname of failover partner.
+.TP
+.I "-l, --local"
+Print labels for local devices.
+.TP
+.I "-f, --foreign"
+Print labels for foreign devices.
+.TP
+.I "-a, --all"
+Print labels for local and foreign devices.
+.TP
+.I "-s, --sanity"
+Sanity check config on this node.
+If any expected local or foreign devices are not present, print an error.
+If devices do not contain the expected labels, print an error.
+.TP
+.I "-d, --device LABEL"
+Print storage device of label.
+.TP
+.I "-j, --journal LABEL"
+Print journal device corresponding to label if defined.
+.TP
+.I "-r, --raidtab LABEL"
+Print Linux software raid configuration file or ZFS cache file associated with
+LABEL, if any. Using non-default names for these files may help prevent arrays
+from being automatically started by the system. This is important in failover
+configurations where the timing of device initialization must be strictly
+controlled.
+.TP
+.I "-t, --type LABEL"
+Print device type of LABEL, i.e. "zfs" or "md".
+.TP
+.I "-z, --zpool LABEL"
+Print zpool containing LABEL.
+.TP
+.I "CMD [ARGS...]"
+Run one instance of \fICMD [ARGS]\fR for each label in parallel.
+Only the local labels are used by default, but foreign or all labels
+may be selected by adding the \fI--foreign\fR or \fI--all\fR options.
+The following substitutions are made:
+%f=fsname, %d=device, %j=journal, %i=index, %I=hex-index, %t=type, %l=label,
+%n=nid, %N=failnid. On failure of any child processes, \fBldev\fR will
+return a non-zero exit code.
+.LP
+It is an error if %n or %N is used in a command and /etc/nids does not
+contain appropriate host to NID mappings.
+.SH EXAMPLES
+To run a preen check on all devices in a cluster in parallel:
+.IP
+.nf
+pdsh -S -g ost ldev fsck.ldiskfs -p %d
+.fi
+.LP
+To re-format an entire file system:
+.IP
+.nf
+#!/bin/bash -xe
+export FANOUT=64
+
+# MDTs
+pdsh -S -g mds service lustre stop
+pdsh -S -g mds ldev "yes \\| mkfs.ldiskfs -q -b4096 \\
+ -Ojournal_dev %j"
+pdsh -S -g mds ldev dd if=/dev/zero of=%d count=8
+pdsh -S -g mds ldev mkfs.lustre --mdt --mgs --fsname=%f \\
+ --index=%i --mkfsoptions=-Jdevice=%j \\
+ --mkfsoptions=-i2048 \\
+ --mountfsoptions=errors=panic,iopen_nopriv,user_xattr,\\
+ maxdirsize=20000000 %d
+pdsh -S -g mds ldev tune.ldiskfs -i0 -m0 -c0 %d
+
+# OSTs
+mgs=172.16.2.200@tcp0
+pdsh -S -g ost service heartbeat stop
+pdsh -S -g ost service lustre stop
+pdsh -S -g ost ldev dd if=/dev/zero of=%d count=8
+pdsh -S -g ost ldev mkfs.lustre --ost --mgsnode=$mgs --fsname=%f \\
+ --index=%i --param=lov.stripecount=2 --failnode=%N \\
+ --mountfsoptions=errors=panic,extents,mballoc %d
+pdsh -S -g ost ldev tune.ldiskfs -epanic -i0 -m0 -c0 %d
+.fi
+.SH FILES
+/etc/ldev.conf
+.br
+/etc/nids
+.SH "SEE ALSO"
+.BR ldev.conf (5)
+.BR nids (5)
--- /dev/null
+.TH ldev.conf 5 Lustre ldev.conf /etc/ldev.conf
+.SH NAME
+/etc/ldev.conf \- lustre device configuration file
+.SH DESCRIPTION
+The ldev.conf file contains a list of Lustre devices used by the
+\fBldev\fR utility.
+.SH FORMAT
+Comments beginning with a hash (#) are ignored. Each line represents one
+device and includes the following information separated by white space:
+.TP
+.I "local hostname"
+The name of the host where the device normally runs.
+.TP
+.I "foreign hostname"
+The name of the host where the device runs when failed over.
+If failover is not used, insert a hypen as a placeholder.
+.TP
+.I "label"
+The Lustre label associated with the device in the form \fIfsname-SRVnnnn\fR
+where \fIfsname\fR is the file system name, \fISRV\fR is \fBOST\fR or
+\fBMDT\fR, and \fInnnn\fR is the four-digit hex index of the device.
+.TP
+.I "path"
+The path name of the device. In failover configurations it should be available
+on both local and foreign hosts, e.g. use the symlinks maintained by udev
+in \fI/dev/disk/by-id\fR.
+.TP
+.I "journal-path"
+The path name of the journal device, if any. This field may be omitted unless
+the raidtab field is present. If a journal device is not used a hyphen may be
+inserted as a placeholder.
+.TP
+.I "raidtab"
+The path name of a Linux software raid configuration file or ZFS cache file.
+Using non-default names for these files may help prevent arrays from being
+automatically started by the system. This is important in failover
+configurations where the timing of device initialization must be strictly
+controlled. This field may be omitted.
+.SH EXAMPLES
+.nf
+
+#local foreign/- label [md:|zfs:]device-path [journal-path]/- [raidtab]
+
+# ldiskfs on block device example
+tycho-mds1 - lc1-MDT0000 /dev/sda /dev/sdc
+tycho1 tycho5 lc1-OST0000 /dev/disk/by-id/scsi-10103a262891d340100
+tycho1 tycho5 lc1-OST0008 /dev/disk/by-id/scsi-10103a262681d340200
+tycho1 tycho5 lc1-OST0010 /dev/disk/by-id/scsi-10103a2629e1d340300
+tycho5 tycho1 lc1-OST0004 /dev/disk/by-id/scsi-101046e6b401d341100
+tycho5 tycho1 lc1-OST000c /dev/disk/by-id/scsi-101046e6b591d341200
+tycho5 tycho1 lc1-OST0014 /dev/disk/by-id/scsi-101046e6bb41d341300
+
+# ldiskfs on Linux software RAID example
+#local foreign/- label [md:|zfs:]device-path [journal-path]/- [raidtab]
+zwicky-mds1 - zwicky-MDT0000 md:/dev/md0 - /etc/mdadm.conf.mds
+zwicky1 zwicky2 zwicky-OST0000 md:/dev/md0 /dev/md10 /etc/mdadm.conf.oss
+zwicky2 zwicky1 zwicky-OST0001 md:/dev/md1 /dev/md20 /etc/mdadm.conf.oss
+
+# ZFS example
+#local foreign/- label [md:|zfs:]device-path [journal-path]/- [raidtab]
+zeno-mds1 - zeno-MDT0000 zfs:lustre-zeno-mds1/mdt1 - /etc/zfs/zpool.cache.zeno
+zeno1 zeno5 zeno-OST0000 zfs:lustre-zeno1/ost1 - /etc/zfs/zpool.cache.zeno
+zeno5 zeno1 zeno-OST0001 zfs:lustre-zeno5/ost1 - /etc/zfs/zpool.cache.zeno
+
+.fi
+.SH "SEE ALSO"
+.BR ldev (8)
--- /dev/null
+.TH lhbadm 8 "2009 Apr 29" Lustre "System Administration Utilities"
+.SH NAME
+lhbadm \- Lustre failover utility
+.SH SYNOPSIS
+.B lhbadm {failover|failback} reason ...
+.br
+.B pdsh -g lustre lhbadm status | dshbak -c
+.br
+.SH DESCRIPTION
+.B lhbadm
+simlifies heartbeat administration on Lustre clusters.
+It offers the following operations:
+.TP
+.B status
+Print a single line status message consisting of the heartbeat resource
+status a hyphen, and lustre status.
+Under normal circumstatus, server status should be \fIlocal-local\fR.
+.TP
+.B failover \fIreason ...\fR
+Initiate failover of local services (and foreign if active) to the
+failover partner.
+The command blocks until the transition is complete, which includes
+starting lustre on the partner node.
+Initiation and completion of failover is logged to the syslog
+\fRuser.err\fI facility.
+.TP
+.B failback \fIreason ...\fR
+Initiate failback of the local services from the failover
+partner. The command blocks until the transition is complete, which
+includes starting lustre.
+Initiation and completion of failback is logged to the syslog
+\fRuser.err\fI facility.
+.SH "HEARTBEAT STATUS VALUES"
+The heartbeat resource status values returned by \fBlhbadm status\fR
+may be one of the following:
+.TP
+.B local
+Hearbeat expects only the local services to be running.
+.TP
+.B none
+Hearbeat expects no services to be running.
+.TP
+.B all
+Hearbeat expects local and foreign services to be running.
+.TP
+.B foreign
+Hearbeat expects only the foreign services to be running.
+.TP
+.B transition
+Resources are in transition.
+.SH "LUSTRE STATUS VALUES"
+The lustre status values returned by \fBlhbadm status\fR
+may be one of the following:
+.TP
+.B loaded
+Lustre modules are loaded but that's about it.
+.TP
+.B recovery
+One or more Lustre services is in recovery.
+.TP
+.B unhealthy
+Lustre is not healthy.
+.TP
+.B none
+Lustre is not running any services
+.TP
+.B local
+Lustre is running only the local services.
+.TP
+.B foreign
+Lustre is running only the foreign services.
+.TP
+.B all
+Lustre is running both the local and foreign services.
+.TP
+.B partial
+Lustre is partially started and may be running one or more services,
+but not exactly the local, foreign, or all sets.
+.SH SEE ALSO
+.BR cl_status (1)
+.BR hb_takeover (1)
+.BR hb_standby (1)
--- /dev/null
+.TH nids 5 Lustre nids /etc/nids
+.SH NAME
+/etc/nids \- The static lookup table for Lustre NIDs
+.SH DESCRIPTION
+The nids file maps host names to NIDs and vice-versa.
+.SH FORMAT
+Comments beginning with a hash (#) are ignored. Each line represents one
+host and includes the following information separated by white space:
+.TP
+.I "hostname"
+The primary hostname of the node, e.g. \fIuname -n\fR.
+.TP
+.I "primary nid"
+The primary NID of the node.
+.TP
+.I "other nid ..."
+Any additional NIDs.
+.SH EXAMPLE
+.nf
+## Tycho
+tycho-mds1 172.16.2.200@tcp 172.16.10.200@tcp
+tycho1 172.16.2.1@tcp 172.16.10.1@tcp
+tycho2 172.16.2.2@tcp 172.16.10.2@tcp
+tycho3 172.16.2.3@tcp 172.16.10.3@tcp
+tycho4 172.16.2.4@tcp 172.16.10.4@tcp
+tycho5 172.16.2.5@tcp 172.16.10.5@tcp
+tycho6 172.16.2.6@tcp 172.16.10.6@tcp
+tycho7 172.16.2.7@tcp 172.16.10.7@tcp
+tycho8 172.16.2.8@tcp 172.16.10.8@tcp
+.fi
+
+.SH FILES
+/etc/nids
/lc_net
/lustre_config
/lustre_createcsv
+/lustre
/lustre_start
/tree_status.pl
--- /dev/null
+#!/bin/bash
+#
+# Lustre - Heartbeat R1 Resource Agent for the Lustre file system
+#
+# Usage: Lustre <resource-name> start|stop|status
+# where <resource-name> has the form "<hostname>-targets"
+#
+
+warn ()
+{
+ if [ -e /etc/logd.cf ] && [ -x /usr/sbin/ha_logger ]; then
+ /usr/sbin/ha_logger -t heartbeat "Lustre: $*"
+ elif [ -x /usr/bin/logger ]; then
+ /usr/bin/logger -t heartbeat "Lustre: $*"
+ elif [ -x /bin/logger ]; then
+ /bin/logger -t heartbeat "Lustre: $*"
+ else
+ echo "Lustre: $*"
+ fi
+}
+
+die ()
+{
+ warn "$*"
+ exit 1
+}
+
+
+if [ $# != 2 ]; then
+ die "wrong number of arguments: $*"
+fi
+if ! [ "$2" == "start" -o "$2" == "stop" -o "$2" == "status" ]; then
+ die "bad action arg[2]: $*"
+fi
+
+if ! [ -x /usr/sbin/ldev ]; then
+ die "/usr/sbin/ldev is missing or not executable"
+fi
+if ! [ -x /etc/init.d/lustre ]; then
+ die "/etc/init.d/lustre is missing or not executable"
+fi
+
+action=$2
+if [ "`uname -n`-targets" == "$1" ]; then
+ service=local
+elif [ "`/usr/sbin/ldev -p`-targets" == "$1" ]; then
+ service=foreign
+else
+ die: "bad service arg[1]: $*"
+fi
+
+# Until multi-mount protect is implemented for ZFS we allow heartbeat to
+# force import a pool. This is required because ZFS will not allow you to
+# import a pool on a new host unless you have cleanly exported it.
+export ZPOOL_IMPORT_ARGS='-f'
+
+# N.B. If status action reports "running", this must pass through to
+# heartbeat unmodified. Otherwise, stdout/stderr is discarded by heartbeat,
+# so if we want to log diagnostic output from init scripts, we have to
+# redirect it here.
+
+warn /etc/init.d/lustre $action $service
+
+tmpout=`mktemp` || die "mktemp failed"
+/etc/init.d/lustre $action $service >$tmpout
+result=$?
+cat $tmpout | while read line; do
+ echo "$line"
+ warn "$line"
+done
+rm -f $tmpout
+
+exit $result
# Lustre is a trademark of Sun Microsystems, Inc.
#
-sbinscripts = lc_servip lustre_up14 lustre_rmmod
+sbinscripts = lc_servip lustre_up14 lustre_rmmod lhbadm ldev
# These are scripts that are generated from .in files
genscripts = lustre_config lc_modprobe lc_net lc_hb lc_cluman lustre_createcsv \
- lc_md lc_lvm lustre_start
+ lc_md lc_lvm lustre_start lustre
+
+initdir = $(sysconfdir)/init.d
+init_SCRIPTS = lustre lnet
+
+hadir = $(sysconfdir)/ha.d/resource.d
+ha_SCRIPTS = Lustre
sbin_SCRIPTS = $(genscripts) $(sbinscripts)
bin_SCRIPTS = lustre_req_history lfs_migrate
EXTRA_DIST = license-status maketags.sh version_tag.pl version_tag-git.pl \
version_tag-cvs.pl version_tag-none.pl lc_common \
$(addsuffix .in,$(genscripts)) lc_mon $(sbinscripts) \
- $(bin_SCRIPTS) make_META.pl
+ $(bin_SCRIPTS) make_META.pl lustre.in lnet lhbadm \
+ haconfig ldev Lustre
scriptlibdir = @libexecdir@/@PACKAGE@
+scriptlib_SCRIPTS = haconfig
scriptlib_DATA = lc_common
CLEANFILES = $(genscripts)
--- /dev/null
+#!/bin/bash
+
+# haconfig - config helper to process heartbeat V1 config skel files
+
+local=`uname -n`
+
+[ -x /usr/sbin/ldev ] || exit 0
+foreign=`/usr/sbin/ldev -p`
+[ -n "$foreign" ] || exit 0
+
+
+umask 022
+
+for file in /etc/ha.d/haresources /etc/ha.d/ha.cf; do
+ if [ -r ${file}.in ]; then
+ sed -e "s!@LOCAL@!$local!g" -e "s!@FOREIGN@!$foreign!g" \
+ < ${file}.in >${file}
+ fi
+done
+
+exit 0
--- /dev/null
+#!/usr/bin/perl
+#
+# ldev - parser for /etc/ldev.conf
+#
+use strict;
+use File::Basename;
+use Getopt::Long qw/ :config posix_default no_ignore_case/;
+
+$ENV{PATH} = "/sbin:/usr/sbin:/bin:/usr/bin";
+
+my $prog = basename($0);
+
+my $usage = <<EOF;
+Usage: $prog [OPTIONS]...
+
+Parse ldev.conf and answer the following queries:
+
+ -h, --help Display this help.
+ -c, --config FILE Set path to config file.
+ -H, --hostname NAME Use NAME instead of local hostname for queries.
+ -p, --partner Print hostname of failover partner.
+ -l, --local Print labels for local devices.
+ -f, --foreign Print labels for foreign devices.
+ -a, --all Print labels for local and foreign devices.
+ -s, --sanity Sanity check config on this node.
+ -d, --device LABEL Print storage device of LABEL.
+ -j, --journal LABEL Print journal device of LABEL if it exists.
+ -r, --raidtab LABEL Print raidtab of LABEL if it exists.
+ -t, --type LABEL Print device type of LABEL, i.e. "zfs" or "md".
+ -z, --zpool LABEL Print zpool containing LABEL.
+ CMD [ARGS] ... Run CMD in parallel for each device substituting:
+ %f=fsname %d=device %i=dec-index %n=main-nid %l=label
+ %t=srvtype %j=journal %I=hex-index %N=fail-nid
+ May be used in combination with -l, -f, -a options.
+EOF
+
+my %eparse = (
+ elabel_uniq => "label used more than once",
+ epairwise => "local and foreign host not mapped to each other",
+ efieldcount => "line has less than the minimum number of fields (4)",
+ ekeyval => "malformed id=name",
+);
+
+my %conf = ();
+
+#
+# Main
+#
+
+parse_cmdline ();
+
+parse_config ();
+
+sanity () if $conf{sanity};
+exec_cmd () if $conf{execcmd};
+query_partner () if $conf{partner};
+query_local () if $conf{local};
+query_foreign () if $conf{foreign};
+query_all () if $conf{all};
+query_device () if $conf{device};
+query_journal () if $conf{journal};
+query_raidtab () if $conf{raidtab};
+query_type () if $conf{type};
+query_zpool () if $conf{zpool};
+
+exit(0);
+
+#
+# Subroutines
+#
+
+sub parse_cmdline
+{
+ my $help = 0;
+ my $host = "";
+
+ $conf{partner} = 0;
+ $conf{all} = 0;
+ $conf{local} = 0;
+ $conf{foreign} = 0;
+ $conf{config} = "/etc/ldev.conf";
+ $conf{nidsfile} = "/etc/nids";
+ $conf{hostname} = `uname -n`; chomp $conf{hostname};
+ $conf{device} = "";
+ $conf{sanity} = 0;
+ $conf{execcmd} = "";
+ $conf{journal} = "";
+
+ my $rc = GetOptions (
+ "help|h!" => \$help,
+ "partner|p!" => \$conf{partner},
+ "all|a!" => \$conf{all},
+ "local|l!" => \$conf{local},
+ "foreign|f!" => \$conf{foreign},
+ "config|c=s" => \$conf{config},
+ "nidsfile|n=s" => \$conf{nidsfile},
+ "hostname|H=s" => \$conf{hostname},
+ "sanity|s!" => \$conf{sanity},
+ "device|d=s" => \$conf{device},
+ "journal|j=s" => \$conf{journal},
+ "raidtab|r=s" => \$conf{raidtab},
+ "type|t=s" => \$conf{type},
+ "zpool|z=s" => \$conf{zpool},
+ );
+
+ usage() if $help || !$rc;
+
+ log_fatal ("cannot read config file\n") if (! -r $conf{config});
+
+ if (@ARGV) {
+ $conf{execcmd} = " " . join " ", @ARGV;
+ }
+
+ parse_nids () if ($conf{execcmd} =~ /(%n|%N)/);
+}
+
+sub parse_config
+{
+ my $line = 0;
+ my %l2f = ();
+ my %label2local = ();
+ my %label2dev = ();
+ my %label2journal = ();
+ my %label2raidtab = ();
+ my %label2type = ();
+ my %label2zpool = ();
+ my @local_labels = ();
+ my @foreign_labels = ();
+
+ open (CONF, "< $conf{config}") or log_fatal ("$conf{config}: $!\n");
+
+ while (<CONF>) {
+ my $type;
+ $line++;
+ s/#.*//;
+ s/(\s)*$//;
+ next if (/^(\s)*$/);
+ chomp;
+ my ($local, $foreign, $label, $dev, $j, $raidtab) = split;
+ if ($dev !~ /^\// && $dev =~ /^([^:]+):(.+)$/) {
+ $type = $1;
+ $dev = $2;
+ }
+ eparse_line ($line, "efieldcount") if (!defined $dev);
+ eparse_line ($line, "epairwise") if (exists $l2f{$local}
+ && $l2f{$local} ne $foreign);
+ $l2f{$local} = $foreign;
+
+ eparse_line ($line, "elabel_uniq") if (exists $label2dev{$label}
+ || exists $label2local{$label});
+ $label2dev{$label} = $dev;
+ $label2local{$label} = $local;
+ $label2journal{$label} = $j if defined $j;
+ $label2raidtab{$label} = $raidtab if defined $raidtab;
+ if (defined $type) {
+ $label2type{$label} = $type;
+ if ($type eq "zfs" && $dev =~ m{^([^/]+)/[^/]+$}) {
+ $label2zpool{$label} = $1;
+ }
+ }
+
+ if ($local eq $conf{hostname}) {
+ push @local_labels, $label;
+ } elsif ($foreign eq $conf{hostname}) {
+ push @foreign_labels, $label;
+ }
+ }
+ close CONF;
+
+ foreach (keys %l2f) {
+ my $foreign = $l2f{$_};
+ next if ($foreign eq "-");
+ eparse_str ($_, "epairwise")
+ unless (!exists $l2f{$foreign} or $l2f{$foreign} eq $_);
+ }
+
+ @{$conf{local_labels}} = @local_labels;
+ @{$conf{foreign_labels}} = @foreign_labels;
+ %{$conf{l2f}} = %l2f;
+ %{$conf{label2dev}} = %label2dev;
+ %{$conf{label2local}} = %label2local;
+ %{$conf{label2journal}} = %label2journal;
+ %{$conf{label2raidtab}} = %label2raidtab;
+ %{$conf{label2type}} = %label2type;
+ %{$conf{label2zpool}} = %label2zpool;
+}
+
+sub parse_nids ()
+{
+ my $line = 0;
+ my %host2nid = ();
+ my %nid2host = ();
+
+ open (NIDS, "< $conf{nidsfile}") or log_fatal ("$conf{nidsfile}: $!\n");
+
+ while (<NIDS>) {
+ $line++;
+ s/#.*//;
+ next if (/^(\s)*$/);
+ chomp;
+ my ($host, $nid, $morenids) = split (/\s+/, $_, 3);
+ if (!defined $nid) {
+ log_fatal ("$conf{nidsfile} line $line: incomplete line\n");
+ }
+ $host2nid{$host} = $nid;
+ $nid2host{$nid} = $host;
+ map { $nid2host{$_} = $host; } split (/\s+/, $morenids);
+ }
+ close NIDS;
+
+ %{$conf{host2nid}} = %host2nid;
+ %{$conf{nid2host}} = %nid2host;
+}
+
+sub query_partner
+{
+ my %l2f = %{$conf{l2f}};
+ my $hostname = $conf{hostname};
+ if (exists $l2f{$hostname} && $l2f{$hostname} ne "-") {
+ print "$l2f{$hostname}\n";
+ }
+}
+
+sub query_local
+{
+ map { print "$_\n"; } @{$conf{local_labels}};
+}
+
+sub query_foreign
+{
+ map { print "$_\n"; } @{$conf{foreign_labels}};
+}
+
+sub query_all
+{
+ query_local ();
+ query_foreign ();
+}
+
+sub query_device
+{
+ my %label2dev = %{$conf{label2dev}};
+
+ if (exists $label2dev{$conf{device}}) {
+ print "$label2dev{$conf{device}}\n";
+ }
+}
+
+sub query_raidtab
+{
+ my %label2raidtab = %{$conf{label2raidtab}};
+
+ if (exists $label2raidtab{$conf{raidtab}}) {
+ print "$label2raidtab{$conf{raidtab}}\n";
+ }
+}
+
+sub query_journal
+{
+ my %label2journal = %{$conf{label2journal}};
+
+ if (exists $label2journal{$conf{journal}} &&
+ $label2journal{$conf{journal}} ne "-") {
+ print "$label2journal{$conf{journal}}\n";
+ }
+}
+
+sub query_type
+{
+ my %label2type = %{$conf{label2type}};
+
+ if (exists $label2type{$conf{type}}) {
+ print "$label2type{$conf{type}}\n";
+ }
+}
+
+sub query_zpool
+{
+ my %label2zpool = %{$conf{label2zpool}};
+
+ if (exists $label2zpool{$conf{zpool}}) {
+ print "$label2zpool{$conf{zpool}}\n";
+ }
+}
+
+sub dd_test
+{
+ my ($dpath) = @_;
+ my $retval = 0;
+ my $bs = `blockdev --getss $dpath 2>/dev/null`; chomp $bs;
+ my $max512 = `blockdev --getsize $dpath 2>/dev/null`; chomp $max512;
+ if ($? == 0 && $bs > 0 && $max512 > 0) {
+ my $maxb = ($max512 / $bs) * 512;
+ my $count = 10 * 1024 * 1024 / $bs; # read first 10mb
+ my $dev = `readlink -f $dpath`; chomp $dev;
+ $count = $maxb if ($count > $maxb);
+ `dd if=$dev of=/dev/null bs=$bs count=$count >/dev/null 2>&1`;
+ $retval = ($? == 0);
+ }
+ return $retval;
+}
+
+sub sanity
+{
+ my $exit_val = 0;
+
+ my @local_labels = @{$conf{local_labels}};
+ my @foreign_labels = @{$conf{foreign_labels}};
+ my %label2dev = %{$conf{label2dev}};
+ my %label2journal = %{$conf{label2journal}};
+
+ foreach (@local_labels, @foreign_labels) {
+ my $lpath = "/dev/disk/by-label/$_";
+ my $dpath = $label2dev{$_};
+ my $jpath = $label2journal{$_};
+ my $label = $_;
+ if (! -e $lpath) {
+ log_error ("$lpath does not exist\n");
+ $exit_val = 1;
+ }
+ if (! -e $dpath) {
+ log_error ("$dpath does not exist\n");
+ $exit_val = 1;
+ } elsif (!dd_test ($dpath)) {
+ log_error ("$dpath failed dd test\n");
+ $exit_val = 1;
+ }
+ if (`readlink -f $lpath` ne `readlink -f $dpath`) {
+ log_error ("$lpath and $dpath point to different things\n");
+ $exit_val = 1;
+ }
+ if ($jpath) {
+ if (! -e $jpath) {
+ log_error ("$jpath (journal for $label) does not exist\n");
+ $exit_val = 1;
+ } elsif (!dd_test ($jpath)) {
+ log_error ("$jpath failed dd test\n");
+ $exit_val = 1;
+ }
+ }
+ }
+ exit($exit_val);
+}
+
+sub par_exec
+{
+ my @pids = ();
+ my %pid2label = ();
+ my %pid2cmd = ();
+ my $pid;
+ my $result = 0;
+
+ my $tmpfile = `mktemp \${TMPDIR:-/tmp}/ldev.XXXXXXXXXX`; chomp $tmpfile;
+ log_fatal ("failed to create $tmpfile\n") if (! -e $tmpfile);
+
+ foreach (@_) {
+ my ($label, $cmd) = split (/\s+/, $_, 2);
+ my ($basecmd) = split (/\s+/, $cmd);
+ if (($pid = fork)) { # parent
+ $pid2label{$pid} = $label;
+ $pid2cmd{$pid} = $basecmd;
+ } elsif (defined $pid) { # child
+ #print STDERR "$label: running $cmd\n";
+ exec "($cmd 2>&1 || rm -f $tmpfile) | sed -e 's/^/$label: /'";
+ print STDERR "$label: exec $basecmd: $!\n"; unlink $tmpfile;
+ } else { # error
+ log_fatal ("label: fork: $!\n"); unlink $tmpfile;
+ }
+ }
+ while (($pid = wait) != -1) {
+ #print STDERR "$pid2label{$pid}: completed\n";
+ }
+
+ # sentinel is intact, so there were no errors
+ if (-e $tmpfile) {
+ unlink $tmpfile;
+ $result = 1;
+ }
+
+ return $result;
+}
+
+sub exec_cmd
+{
+ my @labels = ();
+ my @cmds = ();
+ my %label2dev = %{$conf{label2dev}};
+ my %label2journal = %{$conf{label2journal}};
+ my %l2f = %{$conf{l2f}};
+ my ($nid, $failnid);
+
+ if ($conf{execcmd} =~ /%n/) {
+ my %host2nid = %{$conf{host2nid}};
+ if (!defined $host2nid{$conf{hostname}}) {
+ log_fatal ("%n used but no nid defined for this host\n");
+ }
+ $nid = $host2nid{$conf{hostname}};
+ }
+ if ($conf{execcmd} =~ /%N/) {
+ if (!defined $l2f{$conf{hostname}}) {
+ log_fatal ("%N used but foreign host is undefined\n");
+ }
+ my %host2nid = %{$conf{host2nid}};
+ if (!defined $host2nid{$l2f{$conf{hostname}}}) {
+ log_fatal ("%N used but foreign nid is undefined\n");
+ }
+ $failnid = $host2nid{$l2f{$conf{hostname}}};
+ }
+
+ if ($conf{foreign} and !$conf{local} and !$conf{all}) {
+ @labels = @{$conf{foreign_labels}};
+ } elsif (!$conf{foreign} and !$conf{all}) {
+ @labels = @{$conf{local_labels}};
+ } else {
+ @labels = (@{$conf{local_labels}}, @{$conf{foreign_labels}});
+ }
+ foreach (@labels) {
+ /(\w+)-(OST|MDT|MGT)([0-9a-fA-F]{4})/;
+
+ my $fsname = $1;
+ my $type = $2; $type =~ tr/A-Z/a-z/;
+ my $hexindex = $3;
+ my $decindex = hex($3);
+ my $label = $_;
+ my $cmd = $conf{execcmd};
+ my $device = $label2dev{$_};
+ if ($conf{execcmd} =~ /%j/ && !defined $label2journal{$_}) {
+ log_fatal ("%j used but no journal defined for $_\n");
+ }
+ my $journal = $label2journal{$_};
+
+ $cmd =~ s/%f/$fsname/g; # %f = fsname
+ $cmd =~ s/%t/$type/g; # %t = server type
+ $cmd =~ s/%I/$hexindex/g;# %I = index (hex)
+ $cmd =~ s/%i/$decindex/g;# %i = index (dec)
+ $cmd =~ s/%l/$label/g; # %l = label
+ $cmd =~ s/%d/$device/g; # %d = device
+ $cmd =~ s/%j/$journal/g; # %j = journal device
+ $cmd =~ s/%n/$nid/g; # %n = nid
+ $cmd =~ s/%N/$failnid/g; # %N = failnid
+
+ push @cmds, "$_ $cmd";
+ }
+
+ par_exec (@cmds) or log_fatal ("parallel command execution failed\n");
+ exit 0;
+}
+
+sub usage
+{
+ print STDERR "$usage";
+ exit 0;
+}
+
+sub log_msg { print STDERR "$prog: ", @_; }
+sub log_error { log_msg ("Error: ", @_) }
+sub log_fatal { log_msg ("Fatal: ", @_); exit 1; }
+sub eparse_line { log_fatal ("$conf{config} line $_[0]: $eparse{$_[1]}\n"); }
+sub eparse_str { log_fatal ("$conf{config}: $_[0]: $eparse{$_[1]}\n"); }
--- /dev/null
+#!/bin/bash
+
+# lhbadm - handle some common heartbeat/lustre failover ops
+
+PATH=/sbin:/usr/sbin:/usr/bin:$PATH:/usr/lib64/heartbeat:/usr/lib/heartbeat
+
+declare -r prog=lhbadm
+
+die ()
+{
+ echo "$prog: $@"
+ exit 1
+}
+
+warn ()
+{
+ echo "$prog: $@"
+}
+
+usage ()
+{
+ echo "Usage: $prog status|lstatus|failback|failover"
+ echo " status - print one-line heartbeat-lustre status"
+ echo " failover - fail all my active resources over to partner"
+ echo " failback - fail my normal resources back"
+ exit 1
+}
+
+test_mounts ()
+{
+ local label
+ local lcount=0
+ local fcount=0
+ local ltot=0
+ local ftot=0
+
+ for label in $(ldev -l); do
+ ltot=$((ltot + 1))
+ if [ "$(service lustre status $label)" == "running" ]; then
+ lcount=$((lcount + 1))
+ fi
+ done
+ for label in $(ldev -f); do
+ ftot=$((ftot+ 1))
+ if [ "$(service lustre status $label)" == "running" ]; then
+ fcount=$((fcount + 1))
+ fi
+ done
+
+ if [ $(($lcount + $fcount)) == 0 ]; then
+ echo none
+ elif [ $lcount == $ltot -a $fcount == 0 ]; then
+ echo local
+ elif [ $lcount == 0 -a $fcount == $ftot ]; then
+ echo foreign
+ elif [ $lcount == $ltot -a $fcount == $ftot ]; then
+ echo all
+ else
+ echo partial
+ fi
+}
+
+status ()
+{
+ local rstat fstat
+ local labels
+
+ rstat=$(cl_status rscstatus) || die "cl_status rscstatus failed"
+ fstat=$(service lustre status)
+
+ if [ "$fstat" == "running" ]; then
+ fstat=$(test_mounts)
+ fi
+
+ echo $rstat-$fstat
+}
+
+wait_for_transition ()
+{
+ while sleep 5; do
+ state=$(cl_status rscstatus) || die "cl_status rscstatus failed"
+ [ "$state" == "transition" ] || break
+ done
+}
+
+failover ()
+{
+ local s
+
+ [ "$(id -un)" == "root" ] || die "failover requires root privileges"
+ [ $# -gt 0 ] || die "please include a descriptive reason for the logs"
+
+ s=$(status)
+ logger -s -t Lustre-ha -p user.err "failover start, status=$s, reason: $*"
+
+ hb_standby all 2>/dev/null 1>&2 || die "hb_standby all failed"
+ wait_for_transition
+
+ s=$(status)
+ logger -s -t Lustre-ha -p user.err "failover complete, status=$s"
+}
+
+failback ()
+{
+ local s
+
+ [ "$(id -un)" == "root" ] || die "failback requires root privileges"
+ [ $# -gt 0 ] || die "please include a descriptive reason for the logs"
+
+ s=$(status)
+ logger -s -t Lustre-ha -p user.err "failback start, status=$s, reason: $*"
+
+ hb_takeover local || die "hb_takeover local failed"
+ wait_for_transition
+
+ s=$(status)
+ logger -s -t Lustre-ha -p user.err "failover complete, status=$s"
+}
+
+
+#
+# MAIN
+#
+
+[ $# == 0 ] && usage
+[ -x /usr/bin/cl_status ] || die "Heartbeat is not installed"
+hstat=$(cl_status hbstatus) || die "$hstat"
+
+case "$1" in
+ status) status ;;
+ lstatus) lstatus ;;
+ failback) shift; failback $*;;
+ failover) shift; failover $*;;
+ *) usage ;;
+esac
+
+# vi: ts=4 sw=4 expandtab
--- /dev/null
+#!/bin/bash
+#
+# lnet This shell script takes care of starting and stopping
+# the lnet (Lustre networking) services.
+#
+# chkconfig: - 59 76
+# description: Part of the lustre file system.
+# probe: true
+# config: /etc/sysconfig/lustre
+
+# Source function library.
+[ -f /etc/rc.d/init.d/functions ] && . /etc/rc.d/init.d/functions
+
+# Source networking configuration and check that networking is up.
+[ -f /etc/sysconfig/network ] && . /etc/sysconfig/network && \
+[ "${NETWORKING}" = "no" ] && exit 0
+
+# Check for and source configuration file otherwise set defaults
+[ -f /etc/sysconfig/lnet ] && . /etc/sysconfig/lnet
+
+declare -r TOP_MODULES=( \
+ obdecho \
+ llite \
+ lustre \
+ osc \
+ lov \
+ mds \
+ mdc \
+ mgs \
+ mgc \
+ ost \
+ obdfilter \
+ lquota \
+ ptlrpc \
+)
+declare -r BOTTOM_MODULES=( \
+ ksocklnd \
+ kqswlnd \
+ ko2iblnd \
+ fsfilt_ldiskfs \
+ obdclass \
+ lnet \
+ lvfs \
+ libcfs \
+ ldiskfs \
+)
+
+declare -r awkprog='BEGIN { rc = -1 }
+ { if ( $1 == module_name ) { rc = $3; exit; } }
+ END { print rc }'
+
+# Usage: run_preexec_check [ start | restart | condrestart ]
+# The single parameter will be passed to the PREEXEC_SCRIPT
+run_preexec_check ()
+{
+ if [ -n "$PREEXEC_CHECK" ] && ! $PREEXEC_CHECK ; then
+ echo "Pre-exec check \"$PREEXEC_CHECK\" failed. Aborting."
+ exit 1
+ fi
+
+ if [ -n "$PREEXEC_SCRIPT" ] && ! "$PREEXEC_SCRIPT" "$1" ; then
+ echo "Pre-exec script \"$PREEXEC_SCRIPT\" failed. Aborting."
+ exit 1
+ fi
+}
+
+# Usage: run_postexec_check [ start | restart | condrestart ]
+# The single parameter will be passed to the POSTEXEC_SCRIPT
+run_postexec_check ()
+{
+ if [ -n "$POSTEXEC_CHECK" ] && ! $POSTEXEC_CHECK ; then
+ echo "Post-exec check \"$POSTEXEC_CHECK\" failed. Aborting."
+ exit 1
+ fi
+
+ if [ -n "$POSTEXEC_SCRIPT" ] && ! "$POSTEXEC_SCRIPT" "$1" ; then
+ echo "Post-exec script \"$POSTEXEC_SCRIPT\" failed. Aborting."
+ exit 1
+ fi
+}
+
+remove_modules ()
+{
+ local modules="${@}"
+ local ref_cnt
+
+ for mod in $modules; do
+ ref_cnt=`/sbin/lsmod | awk "$awkprog" "module_name=$mod"`
+ if [ $ref_cnt -lt 0 ]; then
+ # module not loaded, skip it
+ continue
+ fi
+ if [ $ref_cnt -gt 0 ]; then
+ # module in use. maybe it just needs a few seconds
+ # after removal of previous modules.
+ sleep 5
+ ref_cnt=`/sbin/lsmod | awk "$awkprog" module_name=$mod`
+ fi
+ if [ $ref_cnt -eq 0 ]; then
+ # unload the module
+ echo "Removing module $mod"
+ /sbin/rmmod $mod
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Failed to remove module $mod."
+ return 1
+ fi
+ else
+ # boo! module still in use.
+ echo "ERROR: Module $mod has non-zero reference count."
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+stop_lnet ()
+{
+ local errmsg=`/usr/sbin/lctl network unconfigure 2>&1`
+ if [ $? -gt 0 ]; then
+ # The following error message means that lnet is already
+ # unconfigured, and the modules are not loaded.
+ echo $errmsg | grep "LNET unconfigure error 19" > /dev/null
+ if [ $? -gt 0 ]; then
+ return 0
+ else
+ echo "$errmsg"
+ return 1
+ fi
+ fi
+ return 0
+}
+
+status ()
+{
+ old_nullglob="`shopt -p nullglob`"
+ shopt -u nullglob
+
+ STATE="stopped"
+ # LSB compliance - return 3 if service is not running
+ # Lustre-specific returns
+ # 150 - partial startup
+ # 151 - health_check unhealthy
+ # 152 - LBUG
+ RETVAL=3
+ egrep -q "lnet" /proc/modules && STATE="loaded"
+
+ # check for any routes - on a portals router this is the only thing
+ [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
+
+ # check if this is a router
+ if [ -d /proc/sys/lnet ]; then
+ ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`"
+ if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
+ STATE="running"
+ RETVAL=0
+ fi
+ fi
+
+ # check for error in health_check
+ HEALTH="/proc/fs/lustre/health_check"
+ [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=1
+
+ # check for LBUG
+ [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152
+
+ echo $STATE
+ eval $old_nullglob
+}
+
+# See how we were called.
+case "$1" in
+ start)
+ run_preexec_check "start"
+ touch /var/lock/subsys/lnet
+ modprobe lnet || exit 1
+ lctl network up || exit 1
+ run_postexec_check "start"
+ ;;
+ stop)
+ run_preexec_check "stop"
+ remove_modules ${TOP_MODULES[*]} || exit 1
+ stop_lnet || exit 1
+ remove_modules ${BOTTOM_MODULES[*]} || exit 1
+ rm -f /var/lock/subsys/lnet
+ run_postexec_check "stop"
+ ;;
+ status)
+ status
+ ;;
+ restart)
+ $0 stop
+ $0 start
+ ;;
+ reload)
+ touch /var/lock/subsys/lnet
+ ;;
+ probe)
+ if [ ! -f /var/lock/subsys/lnet ] ; then
+ echo $"start"; exit 0
+ fi
+ ;;
+ condrestart)
+ [ -f /var/lock/subsys/lnet ] && {
+ $0 stop
+ $0 start
+ }
+ ;;
+ *)
+ echo $"Usage: lustre {start|stop|status|restart|reload|condrestart}"
+ exit 1
+esac
+
+exit 0
+++ /dev/null
-#!/bin/sh
-#
-# lustre This shell script takes care of starting and stopping Lustre
-#
-# chkconfig: - 99 1
-# description: Lustre Lite network File System.
-# This starts both Lustre client and server functions.
-# processname: lconf
-# config: /etc/lustre/config.xml
-# pidfile: /var/run/lustre.pid
-### BEGIN INIT INFO
-# Provides: lustre
-# Required-Start: $network +sshd
-# Required-Stop: $network
-# Should-Start:
-# Should-Stop:
-# Default-Start:
-# Default-Stop: 0 1 2 3 4 5 6
-# Short-Description: Lustre Lite network File System.
-# Description: This starts both Lustre client and server functions.
-### END INIT INFO
-
-
-SERVICE=${0##*/}
-
-: ${LUSTRE_CFG:=/etc/lustre/lustre.cfg}
-[ -f ${LUSTRE_CFG} ] && . ${LUSTRE_CFG}
-[ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
-
-: ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml}
-: ${LCONF:=lconf}
-: ${LCTL:=lctl}
-# Some distros use modprobe.conf.local
-if [ -f /etc/modprobe.conf.local ]; then
- : ${MODPROBE_CONF:=/etc/modprobe.conf.local}
-else
- : ${MODPROBE_CONF:=/etc/modprobe.conf}
-fi
-# Be sure the proper directories are in PATH.
-export PATH="/sbin:$PATH"
-
-case "$SERVICE" in
- [SK][[:digit:]][[:digit:]]lustre | lustre)
- SERVICE="lustre"
- : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
- : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
- ;;
- *)
- : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"}
- : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"}
- ;;
-esac
-LOCK=/var/lock/subsys/$SERVICE
-
-# Source function library.
-if [ -f /etc/init.d/functions ] ; then
- . /etc/init.d/functions
-fi
-
-# Source networking configuration.
-if [ -f /etc/sysconfig/network ] ; then
- . /etc/sysconfig/network
-fi
-
-check_start_stop() {
- # Exit codes now LSB compliant
- # Check that networking is up. - exit 'not running'
- [ "${NETWORKING}" = "no" ] && exit 7
-
- # exit 'not installed'
- [ -x ${LCONF} -a -x ${LCTL} ] || exit 5
-
- if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then
- if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then
- echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping."
- # exit 'not configured'
- exit 6
- fi
- fi
-
- # Create /var/lustre directory
- # This is used by snmp agent for checking lustre services
- # status online/offline/online pending/offline pending.
-
- [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR
- STATUS=${STATUS_DIR}/sysStatus
-}
-
-start() {
- if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then
- if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then
- cat >&2 <<EOF
-This script was run directly, which can be dangerous if you are using
-clumanager to manage Lustre services.
-
-If you are not using clumanager for Lustre services, run the following
-command to have this script start Lustre instead:
-
-touch /etc/lustre/start-despite-clumanager
-EOF
- RETVAL=6 # program not configured
- return
- fi
- fi
- check_start_stop
- echo -n "Starting $SERVICE: "
- if [ $UID -ne 0 ]; then
- echo "Lustre should be started as root"
- RETVAL=4 # insufficent privileges
- return
- fi
- # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
- ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
- if [[ ! -z ${ROUTER} ]]; then
- modprobe lnet
- ${LCTL} network configure
- else
- ${LCONF} ${LCONF_START_ARGS}
- fi
- RETVAL=$?
- echo $SERVICE
- if [ $RETVAL -eq 0 ]; then
- touch $LOCK
- echo "online" >$STATUS
- else
- echo "online pending" >$STATUS
- fi
-}
-
-stop() {
- check_start_stop
- echo -n "Shutting down $SERVICE: "
- if [ $UID -ne 0 ]; then
- echo "Lustre should be stopped as root"
- RETVAL=4 # insufficent privileges
- return
- fi
- # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
-+ ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"`
- if [[ ! -z ${ROUTER} ]]; then
- MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet`
- if [[ ! -z ${MODULE_LOADED} ]]; then
- ${LCTL} network unconfigure
- fi
- ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
- # do it again, in case we tried to unload ksocklnd too early
- ${LCTL} modules | awk '{ print $2 }' | xargs rmmod
-
- else
- ${LCONF} ${LCONF_STOP_ARGS}
- fi
- RETVAL=$?
- echo $SERVICE
- rm -f $LOCK
- if [ $RETVAL -eq 0 ]; then
- echo "offline" >$STATUS
- else
- echo "offline pending" >$STATUS
- fi
-}
-
-restart() {
- stop
- start
-}
-
-status() {
- STATE="stopped"
- # LSB compliance - return 3 if service is not running
- # Lustre-specific returns
- # 150 - partial startup
- # 151 - health_check unhealthy
- # 152 - LBUG
- RETVAL=3
- egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
-
- # check for any routes - on a portals router this is the only thing
- [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
-
- # check for any configured devices (may indicate partial startup)
- if [ -d /proc/fs/lustre ]; then
- [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
-
- # check for either a server or a client filesystem
- MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
- OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
- LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
- [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
- else
- # check if this is a router
- if [ -d /proc/sys/lnet ]; then
- ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`"
- if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
- STATE="running"
- RETVAL=0
- fi
- fi
- fi
-
- # check for server disconnections
- DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
- [ "$DISCON" ] && STATE="disconnected" && RETVAL=0
-
- # check for servers in recovery
- [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" && RETVAL=0
-
- # check for error in health_check
- HEALTH="/proc/fs/lustre/health_check"
- [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151
-
- # check for LBUG
- [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152
-
- # If Lustre is up , check if the service really exists
- # Skip this is we are not checking a specific service
- if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then
- DUMMY=$( $LCTL dl | grep "$SERVICE")
- [ $? -ne 0 ] && STATE="not_found" && RETVAL=3
- fi
-
- echo $STATE
-}
-
-# See how we were called.
-case "$1" in
- start)
- start
- ;;
- stop)
- stop
- ;;
- restart)
- restart
- ;;
- status)
- status $SERVICE
- ;;
- *)
- echo "Usage: $SERVICE {start|stop|restart|status}"
- exit 1
-esac
-
-exit $RETVAL
--- /dev/null
+#!/bin/bash
+#
+# lustre This shell script takes care of starting and stopping
+# the lustre services.
+#
+# chkconfig: - 60 20
+# description: Part of the lustre file system.
+# probe: true
+# config: /etc/sysconfig/lustre
+
+# Source function library.
+. /etc/rc.d/init.d/functions
+
+# Source networking configuration.
+if [ ! -f /etc/sysconfig/network ]; then
+ exit 0
+fi
+
+. /etc/sysconfig/network
+
+LDEV=${LDEV:-"/usr/sbin/ldev"}
+ZPOOL_LAYOUT=/usr/bin/zpool_layout
+UDEVADM=${UDEVADM:-/sbin/udevadm}
+
+# Check that networking is up.
+[ "${NETWORKING}" = "no" ] && exit 0
+
+# Check for and source configuration file otherwise set defaults
+[ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre
+FSCK_ARGS=${FSCK_ARGS:-""}
+MOUNT_OPTIONS=${MOUNT_OPTIONS:-""}
+LOCAL_SRV=${LOCAL_SRV:-"`$LDEV -l 2>/dev/null`"}
+FOREIGN_SRV=${FOREIGN_SRV:-"`$LDEV -f 2>/dev/null`"}
+REQUIRE_MMP_FEATURE=${REQUIRE_MMP_FEATURE:-${FOREIGN_SRV:+"yes"}}
+LOCAL_MOUNT_DIR=${LOCAL_MOUNT_DIR:-"/mnt/lustre/local"}
+FOREIGN_MOUNT_DIR=${FOREIGN_MOUNT_DIR:-"/mnt/lustre/foreign"}
+SETUP_DEVICES=${SETUP_DEVICES:-""}
+ZPOOL_LAYOUT_BUSES=${ZPOOL_LAYOUT_BUSES:-""}
+ZPOOL_LAYOUT_PORTS=${ZPOOL_LAYOUT_PORTS:-""}
+ZPOOL_LAYOUT_MAP=${ZPOOL_LAYOUT_MAP:-""}
+MOUNT_DELAY=${MOUNT_DELAY:-2}
+LOAD_ZFS=${LOAD_ZFS:-""}
+
+shopt -s nullglob
+
+start_zfs_services ()
+{
+ if [ -n "$ZPOOL_LAYOUT_BUSES" -a -n "$ZPOOL_LAYOUT_PORTS" ] ; then
+ MAP_ARG=${ZPOOL_LAYOUT_MAP:+"-m $ZPOOL_LAYOUT_MAP"}
+ $ZPOOL_LAYOUT -t -b "$ZPOOL_LAYOUT_BUSES" \
+ -p "$ZPOOL_LAYOUT_PORTS" $MAP_ARG
+ fi
+ if [ "$LOAD_ZFS" = "yes" ] && ! modprobe zfs ; then
+ echo "Failed to load zfs module. Aborting."
+ exit 1
+ fi
+}
+
+stop_devices ()
+{
+ local labels=$*
+ local result=0
+ local label devtype
+ for label in $labels; do
+ devtype=`$LDEV -t $label`
+ if [ "$devtype" = "zfs" ] ; then
+ export_zpool $label
+ elif [ "$devtype" = "md" ] ; then
+ dev=`label_to_device $label`
+ journal=`$LDEV -j $label`
+ stop_md_device $dev
+ stop_md_device $journal
+ fi
+ done
+}
+
+import_zpool ()
+{
+ local result=1
+ local label=$1
+ local pool=`$LDEV -z $label`
+ local args="-N $ZPOOL_IMPORT_ARGS"
+ local cache=`$LDEV -r $label`
+ # -c is incompatible with -d
+ if [ -n "$cache" ] ; then
+ args="$args -c $cache"
+ elif [ -n "$ZPOOL_IMPORT_DIR" ] ; then
+ args="$args -d $ZPOOL_IMPORT_DIR"
+ elif [ -d "/dev/disk/by-vdev" ] ; then
+ args="$args -d /dev/disk/by-vdev"
+ elif [ -d "/dev/mapper" ] ; then
+ args="$args -d /dev/mapper"
+ fi
+
+ if zpool status $pool >/dev/null 2>&1 ; then
+ result=0
+ elif [ -n "$pool" ] ; then
+ zpool import $pool $args 2>/dev/null
+ result=$?
+ fi
+ return $result
+}
+
+export_zpool ()
+{
+ local label=$1
+ local pool=`$LDEV -z $label`
+ zpool export $pool 2>/dev/null
+}
+
+# Trigger udev and wait for it to settle.
+udev_trigger()
+{
+ if [ -x ${UDEVADM} ]; then
+ ${UDEVADM} trigger --action=change --subsystem-match=block
+ ${UDEVADM} settle
+ else
+ /sbin/udevtrigger
+ /sbin/udevsettle
+ fi
+}
+
+# Usage: run_preexec_check [ start | restart | condrestart ]
+# The single parameter will be passed to the PREEXEC_SCRIPT
+run_preexec_check ()
+{
+ if [ -n "$PREEXEC_CHECK" ] && ! $PREEXEC_CHECK ; then
+ echo "Pre-exec check \"$PREEXEC_CHECK\" failed. Aborting."
+ exit 1
+ fi
+
+ if [ -n "$PREEXEC_SCRIPT" ] && ! "$PREEXEC_SCRIPT" "$1" ; then
+ echo "Pre-exec script \"$PREEXEC_SCRIPT\" failed. Aborting."
+ exit 1
+ fi
+}
+
+# Usage: run_postexec_check [ start | restart | condrestart ]
+# The single parameter will be passed to the PREEXEC_SCRIPT
+run_postexec_check ()
+{
+ if [ -n "$POSTEXEC_CHECK" ] && ! $POSTEXEC_CHECK ; then
+ echo "Post-exec check \"$POSTEXEC_CHECK\" failed. Aborting."
+ exit 1
+ fi
+
+ if [ -n "$POSTEXEC_SCRIPT" ] && ! "$POSTEXEC_SCRIPT" "$1" ; then
+ echo "Post-exec script \"$POSTEXEC_SCRIPT\" failed. Aborting."
+ exit 1
+ fi
+}
+
+# Usage: adjust_scsi_timeout <dev>
+adjust_scsi_timeout ()
+{
+ local dev=$1
+
+ if [ -n "$SCSI_DEVICE_TIMEOUT" ]; then
+ # make sure that it is actually a SCSI (sd) device
+ local name=`basename $dev`
+ local proc=/sys/block/${name}/device/timeout
+ local driver=`readlink /sys/block/${name}/device/driver`
+ if [ -n "$driver" ] && [ "`basename $driver`" == "sd" ]; then
+ if ! echo $SCSI_DEVICE_TIMEOUT >$proc; then
+ echo "FAILED: could not adjust ${dev} timeout"
+ return 1
+ fi
+ fi
+ fi
+ return 0
+}
+
+# Usage: fsck_test <dev> [ <dev> ... ]
+# Checks all devices in parallel if FSCK_ARGS is set.
+fsck_test ()
+{
+ local devices="$*"
+
+ # Filter out non-absolute paths, which are probably ZFS datasets
+ devices=`echo $devices |xargs -n 1|grep '^/'|xargs`
+
+ if [ -n "${FSCK_ARGS}" -a -n "$devices" ]; then
+ if [ -x /sbin/@PFSCK@ ] ; then
+ echo "@PFSCK@ $devices -- ${FSCK_ARGS}"
+ /sbin/@PFSCK@ $devices -- ${FSCK_ARGS}
+ if [ $? -ne 0 -a $? -ne 1 ] ; then
+ echo "FAILED: @PFSCK@ -- ${FSCK_ARGS}: $?"
+ return 1
+ fi
+ else
+ echo "/sbin/@PFSCK@ not found"
+ return 1
+ fi
+ fi
+ return 0
+}
+
+# Usage: test_feature_flag <dev> <flag>
+test_feature_flag()
+{
+ local dev=$1
+ local flag=$2
+ local result=1
+ local feature
+
+ for feature in `/sbin/@TUNE2FS@ -l $dev 2>/dev/null \
+ | grep features: | sed -e 's/^.*: //'`; do
+ if [ "$feature" == "$flag" ]; then
+ result=0
+ break
+ fi
+ done
+
+ return $result
+}
+
+# Usage: mmp_test <dev>
+# Returns 0 if it is set or not required, 1 if unset and required or error.
+mmp_test ()
+{
+ local dev=$1
+ local result=0
+
+ if [ "$REQUIRE_MMP_FEATURE" == "yes" ]; then
+ if [ -x /sbin/@TUNE2FS@ ]; then
+ if ! test_feature_flag $dev "mmp"; then
+ echo "mmp feature flag is not set on $dev"
+ result=1
+ fi
+ else
+ echo "/sbin/@TUNE2FS@ not found"
+ result=1
+ fi
+ fi
+
+ return $result
+}
+
+# Usage: label_to_mountpt <label>
+# Prints mount point path, if label matches a local or foreign server.
+label_to_mountpt ()
+{
+ local label=$1
+ local serv
+
+ for serv in $LOCAL_SRV; do
+ if [ "$serv" == "$label" ]; then
+ echo "$LOCAL_MOUNT_DIR/$label"
+ return
+ fi
+ done
+ for serv in $FOREIGN_SRV; do
+ if [ "$serv" == "$label" ]; then
+ echo "$FOREIGN_MOUNT_DIR/$label"
+ return
+ fi
+ done
+}
+
+# Usage: label_to_device <label>
+# Prints canonical device path.
+label_to_device ()
+{
+ local label=$1
+ local path=/dev/disk/by-label/$label
+
+ if [ -h $path ] ; then
+ readlink --canonicalize $path
+ else
+ $LDEV -d $label
+ fi
+}
+
+# helper for mountpt_is_active() and device_is_active()
+declare -r awkprog='BEGIN {rc = 1;}
+ { if ($field == path) {rc = 0;} }
+ END { exit rc;}'
+
+# Usage: mountpt_is_active <label>
+# Return 1 (inactive) on invalid label.
+mountpt_is_active ()
+{
+ local dir=`label_to_mountpt $1`
+ local result=1
+
+ if [ -n "$dir" ]; then
+ cat /proc/mounts | awk "$awkprog" field=2 path=$dir
+ result=$?
+ fi
+ return $result
+}
+
+# Usage: device_is_active <label>
+# Return 1 (inactive) on invalid label.
+device_is_active ()
+{
+ local dev=`label_to_device $1`
+ local result=1
+
+ if [ -n "$dev" ]; then
+ cat /proc/mounts | awk "$awkprog" field=1 path=$dir
+ result=$?
+ fi
+ return $result
+}
+
+# Usage: mount_one_device <label> <successflag> [devtype]
+# Remove <succesflag> on error (trick to detect errors after parallel runs).
+mount_one_device ()
+{
+ local label=$1
+ local successflag=$2
+ local devtype=$3
+ local dev=`label_to_device $label`
+ local dir=`label_to_mountpt $label`
+
+ # $dir and $dev have already been checked at ths point
+ if [ ! -d $dir ] && ! mkdir -p $dir; then
+ rm -f $successflag
+ return
+ fi
+ echo "Mounting $dev on $dir"
+ if ! mount -t lustre $MOUNT_OPTIONS $dev $dir; then
+ rm -f $successflag
+ return
+ fi
+}
+
+# Usage: assemble_md_device <device>
+# Assemble the md device backing device.
+# Return 0 if the array is assembled successfully or was already active,
+# otherwise return error code from mdadm.
+assemble_md_device ()
+{
+ local dev=$1
+ local raidtab=$2
+ local args="-Aq"
+ local result=0
+
+ if [ -n "$raidtab" ] ; then
+ args="$args -c $raidtab"
+ fi
+
+ if ! md_array_is_active $dev ; then
+ mdadm $args $dev
+ result=$?
+ fi
+
+ udev_trigger
+ return $result
+}
+
+# Usage: stop_md_device <device>
+# Stop the md device backing device.
+# Return 0 if the array is stopped successfully or was not active,
+# otherwise return error code from mdadm.
+stop_md_device ()
+{
+ local dev=$1
+ local raidtab=$2
+ local args="-Sq"
+ local result=0
+
+ if [ -n "$raidtab" ] ; then
+ args="$args -c $raidtab"
+ fi
+
+ if [ -e $dev ] && md_array_is_active $dev ; then
+ mdadm $args $dev
+ result=$?
+ fi
+
+ return $result
+}
+
+# Usage: md_array_is_active <device>
+# return 0 if device is an active md RAID array, or 1 otherwise
+md_array_is_active ()
+{
+ local device=$1
+
+ [ -e "$device" ] || return 1
+
+ mdadm --detail -t $device > /dev/null 2>&1
+ if [ $? -eq 4 ] ; then
+ return 1
+ fi
+ return 0
+}
+
+# Usage: start_services <label> [ <label> ... ]
+# fsck and mount any devices listed as arguments (in parallel).
+# Attempt to assemble software raid arrays or zfs pools backing
+# Lustre devices.
+start_services ()
+{
+ local result=0
+ local devices=""
+ local dir dev label
+ local successflag
+ local labels
+
+ start_zfs_services
+ for label in $*; do
+ dir=`label_to_mountpt $label`
+ devtype=`$LDEV -t $label`
+ dev=`label_to_device $label`
+ journal=`$LDEV -j $label`
+ raidtab=`$LDEV -r $label`
+
+ if [ -z "$dir" ] || [ -z "$dev" ]; then
+ echo "$label is not a valid lustre label on this node"
+ result=2
+ continue
+ fi
+
+ if [ "$devtype" = "md" ] ; then
+ if ! assemble_md_device $dev $raidtab ; then
+ echo "failed to assemble array $dev backing $label"
+ result=2
+ continue
+ fi
+ elif [ "$devtype" = "zfs" ] ; then
+ if ! import_zpool $label ; then
+ result=2
+ fi
+ fi
+
+ # Journal device field in ldev.conf may be "-" or empty,
+ # so only attempt to assemble if its an absolute path.
+ # Ignore errors since the journal device may not be an
+ # md device.
+ if echo $journal | grep -q ^/ ; then
+ assemble_md_device $journal $raidtab 2>/dev/null
+ fi
+
+ if [ "x$devtype" != "xzfs" ] ; then
+ if mountpt_is_active $label || \
+ device_is_active $label; then
+ echo "$label is already mounted"
+ # no error
+ continue
+ fi
+ if ! mmp_test $dev; then
+ result=2
+ continue
+ fi
+ if ! adjust_scsi_timeout $dev; then
+ result=2
+ continue
+ fi
+ fi
+ devices="$devices $dev"
+ labels="$labels $label"
+ done
+ if [ $result == 0 ]; then
+ fsck_test $devices || return 2
+
+ # Fork to handle multiple mount_one_device()'s in parallel.
+ # Errors occurred if $successflag comes up missing afterwards.
+ successflag=`mktemp`
+ [ -e $successflag ] || return 2
+ for label in $labels; do
+ mount_one_device $label $successflag `$LDEV -t $label` &
+ # stagger to avoid module loading races
+ if [[ -n $MOUNT_DELAY && $MOUNT_DELAY -gt 0 ]] ; then
+ sleep $MOUNT_DELAY
+ fi
+ done
+ for label in $labels; do
+ wait
+ done
+ [ -e $successflag ] || return 2
+ rm -f $successflag
+ fi
+
+ return $result
+}
+
+# Usage: stop_services <label> [ <label> ... ]
+# Unmount any devices listed as arguments (serially).
+# Any devices which are not mounted or don't exist are skipped with no error.
+stop_services ()
+{
+ local labels=$*
+ local result=0
+ local dir dev label
+
+ for label in $labels; do
+ dir=`label_to_mountpt $label`
+ if [ -z "$dir" ]; then
+ echo "$label is not a valid lustre label on this node"
+ result=2
+ continue
+ fi
+ if ! mountpt_is_active $label; then
+ #echo "$label is not mounted"
+ # no error
+ continue
+ fi
+ echo "Unmounting $dir"
+ umount $dir || result=2
+ done
+ # double check!
+ for label in $labels; do
+ if mountpt_is_active $label; then
+ dir=`label_to_mountpt $label`
+ echo "Mount point $dir is still active"
+ result=2
+ fi
+ if device_is_active $label; then
+ dev=`label_to_device $label`
+ echo "Device $dev is still active"
+ result=2
+ fi
+ done
+ stop_devices $labels
+
+ return $result
+}
+
+# Usage: start_lustre_services [local|foreign|all|<label>]
+# If no parameter is specified, local devices will be started.
+start_lustre_services ()
+{
+ local labels=""
+
+ case "$1" in
+ ""|local)
+ labels=$LOCAL_SRV
+ ;;
+ foreign)
+ labels=$FOREIGN_SRV
+ ;;
+ all) labels="$LOCAL_SRV $FOREIGN_SRV"
+ ;;
+ *) labels="$1"
+ ;;
+ esac
+ # for use by heartbeat V1 resource agent:
+ # starting an already-started service must not be an error
+ start_services $labels || exit 2
+}
+
+# Usage: stop_lustre_services [local|foreign|all|<label>]
+# If no parameter is specified all devices will be stopped.
+stop_lustre_services ()
+{
+ local labels=""
+
+ case "$1" in
+ local) labels=$LOCAL_SRV
+ ;;
+ foreign)
+ labels=$FOREIGN_SRV
+ ;;
+ ""|all) labels="$LOCAL_SRV $FOREIGN_SRV"
+ ;;
+ *) labels="$1"
+ ;;
+ esac
+ # for use by heartbeat V1 resource agent:
+ # stopping already-stopped service must not be an error
+ stop_services $labels || exit 2
+}
+
+# General lustre health check - not device specific.
+health_check ()
+{
+ old_nullglob="`shopt -p nullglob`"
+ shopt -u nullglob
+
+ STATE="stopped"
+ # LSB compliance - return 3 if service is not running
+ # Lustre-specific returns
+ # 150 - partial startup
+ # 151 - health_check unhealthy
+ # 152 - LBUG
+ RETVAL=3
+ egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
+
+ # check for any configured devices (may indicate partial startup)
+ if [ -d /proc/fs/lustre ]; then
+ if [ -n "`cat /proc/fs/lustre/devices 2> /dev/null`" ] ; then
+ STATE="partial"
+ RETVAL=150
+ fi
+
+ # check for either a server or a client filesystem
+ MDT="`ls /proc/fs/lustre/mdt/*/recovery_status 2> /dev/null`"
+ OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status \
+ 2> /dev/null`"
+ LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
+ if [ "$MDT" -o "$OST" -o "$LLITE" ]; then
+ STATE="running"
+ RETVAL=0
+ fi
+ else
+ # check if this is a router
+ if [ -d /proc/sys/lnet ]; then
+ ROUTER="`cat /proc/sys/lnet/routes | head -1 |
+ grep -i -c \"Routing enabled\"`"
+ if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
+ STATE="running"
+ RETVAL=0
+ fi
+ fi
+ fi
+
+ # check for server disconnections
+ DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
+ if [ -n "$DISCON" ] ; then
+ STATE="disconnected"
+ RETVAL=0
+ fi
+
+ # check for servers in recovery
+ if [ -n "$MDT$OST" ] && grep -q RECOV $MDT $OST ; then
+ STATE="recovery"
+ RETVAL=0
+ fi
+
+ # check for error in health_check
+ HEALTH="/proc/fs/lustre/health_check"
+ if [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH ; then
+ STATE="unhealthy"
+ RETVAL=1
+ fi
+
+ # check for LBUG
+ if [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH ; then
+ STATE="LBUG"
+ RETVAL=152
+ fi
+
+ echo $STATE
+ eval $old_nullglob
+ return $RETVAL
+}
+
+# Usage: status [local|foreign|all|<label>]
+# If no parameter is specified, general lustre health status will be reported.
+status ()
+{
+ local labels=""
+ local label dir
+ local valid_devs=0
+
+ case "$1" in
+ local) labels=$LOCAL_SRV;
+ ;;
+ foreign)
+ labels=$FOREIGN_SRV;
+ ;;
+ all) labels="$LOCAL_SRV $FOREIGN_SRV"
+ ;;
+ "") # ASSUMPTION: this is not the heartbeat res agent
+ health_check
+ exit $?
+ ;;
+ *) labels=$1
+ ;;
+ esac
+ # for use by heartbeat V1 resource agent:
+ # print "running" if *anything* is running.
+ for label in $labels; do
+ dir=`label_to_device $label`
+ if [ -z "$dir" ]; then
+ echo "$label is not a valid lustre label on this node"
+ # no error
+ continue
+ fi
+ valid_devs=1
+ if mountpt_is_active $label || device_is_active $label; then
+ echo "running"
+ exit 0
+ fi
+ done
+ [ $valid_devs == 1 ] && echo "stopped"
+ exit 3
+}
+
+usage ()
+{
+ cat <<EOF
+Usage: lustre {start|stop|status|restart|reload|condrestart}
+
+ lustre start [local|foreign|<label>]
+ lustre stop [local|foreign|<label>]
+ lustre status [local|foreign|<label>]
+EOF
+ exit 1
+}
+
+# See how we were called.
+case "$1" in
+ start)
+ if [ $# -gt 2 ] ; then
+ echo "ERROR: Too many arguments."
+ usage
+ fi
+ run_preexec_check "start"
+ start_lustre_services $2
+ run_postexec_check "start"
+ ;;
+ stop)
+ if [ $# -gt 2 ] ; then
+ echo "ERROR: Too many arguments."
+ usage
+ fi
+ run_preexec_check "stop"
+ stop_lustre_services $2
+ run_postexec_check "stop"
+ ;;
+ status)
+ if [ $# -gt 2 ] ; then
+ echo "ERROR: Too many arguments."
+ usage
+ fi
+ status $2
+ ;;
+ restart)
+ $0 stop
+ $0 start
+ ;;
+ reload)
+ ;;
+ probe)
+ ;;
+ condrestart)
+ if grep lustre /proc/mounts ; then
+ $0 stop
+ $0 start
+ fi
+ ;;
+ *)
+ usage
+esac
+
+exit 0