From: Ned Bass Date: Thu, 13 Oct 2011 17:56:03 +0000 (-0700) Subject: LU-107 Add scripts for implementing heartbeat v1 failover X-Git-Tag: 2.2.91~28 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=04a38ba7cda4e242850a47b13d4402a69e8dd921;ds=sidebyside LU-107 Add scripts for implementing heartbeat v1 failover /usr/sbin/ldev - list devices, determine validity, etc. /usr/sbin/lhbadm - wrapper for heartbeat utils for failover/failback/status /etc/ha.d/resource.d/Lustre - heartbeat resource agent (wraps init script) /etc/init.d/lustre - lustre init script /etc/init.d/lnet - lnet init script /usr/sbin/haconfig - helper script for building heartbeat config files The scripts use two configuration files: /etc/ldev.conf - maps hostnames to failover partners, devices, and labels /etc/nids - hostnames to lustre NIDS In addition to heartbeat support, the ldev script enables parallel execution of commands against all luns configured on a server. The lustre init script supports devices backed by Linux software RAID, ZFS, or traditional block devices. NOTE: these scripts presume the udev rules for persistent block device naming are in place, in particular that lustre labels can be mapped to block devices in /dev/disk/by-id. Change-Id: I8391744ce6eed989c061f131aca4a2da7b5d51b2 Signed-off-by: Ned Bass Signed-off-by: Brian Behlendorf Reviewed-on: http://review.whamcloud.com/290 Reviewed-by: Doug Oucharek Tested-by: Maloo Tested-by: Hudson Reviewed-by: Oleg Drokin --- diff --git a/lustre.spec.in b/lustre.spec.in index cd914af..a2e52fd 100644 --- a/lustre.spec.in +++ b/lustre.spec.in @@ -233,6 +233,13 @@ cat >lustre.files < start|stop|status +# where has the form "-targets" +# + +warn () +{ + if [ -e /etc/logd.cf ] && [ -x /usr/sbin/ha_logger ]; then + /usr/sbin/ha_logger -t heartbeat "Lustre: $*" + elif [ -x /usr/bin/logger ]; then + /usr/bin/logger -t heartbeat "Lustre: $*" + elif [ -x /bin/logger ]; then + /bin/logger -t heartbeat "Lustre: $*" + else + echo "Lustre: $*" + fi +} + +die () +{ + warn "$*" + exit 1 +} + + +if [ $# != 2 ]; then + die "wrong number of arguments: $*" +fi +if ! [ "$2" == "start" -o "$2" == "stop" -o "$2" == "status" ]; then + die "bad action arg[2]: $*" +fi + +if ! [ -x /usr/sbin/ldev ]; then + die "/usr/sbin/ldev is missing or not executable" +fi +if ! [ -x /etc/init.d/lustre ]; then + die "/etc/init.d/lustre is missing or not executable" +fi + +action=$2 +if [ "`uname -n`-targets" == "$1" ]; then + service=local +elif [ "`/usr/sbin/ldev -p`-targets" == "$1" ]; then + service=foreign +else + die: "bad service arg[1]: $*" +fi + +# Until multi-mount protect is implemented for ZFS we allow heartbeat to +# force import a pool. This is required because ZFS will not allow you to +# import a pool on a new host unless you have cleanly exported it. +export ZPOOL_IMPORT_ARGS='-f' + +# N.B. If status action reports "running", this must pass through to +# heartbeat unmodified. Otherwise, stdout/stderr is discarded by heartbeat, +# so if we want to log diagnostic output from init scripts, we have to +# redirect it here. + +warn /etc/init.d/lustre $action $service + +tmpout=`mktemp` || die "mktemp failed" +/etc/init.d/lustre $action $service >$tmpout +result=$? +cat $tmpout | while read line; do + echo "$line" + warn "$line" +done +rm -f $tmpout + +exit $result diff --git a/lustre/scripts/Makefile.am b/lustre/scripts/Makefile.am index 9b00685..19fc744 100644 --- a/lustre/scripts/Makefile.am +++ b/lustre/scripts/Makefile.am @@ -34,11 +34,17 @@ # Lustre is a trademark of Sun Microsystems, Inc. # -sbinscripts = lc_servip lustre_up14 lustre_rmmod +sbinscripts = lc_servip lustre_up14 lustre_rmmod lhbadm ldev # These are scripts that are generated from .in files genscripts = lustre_config lc_modprobe lc_net lc_hb lc_cluman lustre_createcsv \ - lc_md lc_lvm lustre_start + lc_md lc_lvm lustre_start lustre + +initdir = $(sysconfdir)/init.d +init_SCRIPTS = lustre lnet + +hadir = $(sysconfdir)/ha.d/resource.d +ha_SCRIPTS = Lustre sbin_SCRIPTS = $(genscripts) $(sbinscripts) bin_SCRIPTS = lustre_req_history lfs_migrate @@ -46,9 +52,11 @@ bin_SCRIPTS = lustre_req_history lfs_migrate EXTRA_DIST = license-status maketags.sh version_tag.pl version_tag-git.pl \ version_tag-cvs.pl version_tag-none.pl lc_common \ $(addsuffix .in,$(genscripts)) lc_mon $(sbinscripts) \ - $(bin_SCRIPTS) make_META.pl + $(bin_SCRIPTS) make_META.pl lustre.in lnet lhbadm \ + haconfig ldev Lustre scriptlibdir = @libexecdir@/@PACKAGE@ +scriptlib_SCRIPTS = haconfig scriptlib_DATA = lc_common CLEANFILES = $(genscripts) diff --git a/lustre/scripts/haconfig b/lustre/scripts/haconfig new file mode 100644 index 0000000..5869ea2 --- /dev/null +++ b/lustre/scripts/haconfig @@ -0,0 +1,21 @@ +#!/bin/bash + +# haconfig - config helper to process heartbeat V1 config skel files + +local=`uname -n` + +[ -x /usr/sbin/ldev ] || exit 0 +foreign=`/usr/sbin/ldev -p` +[ -n "$foreign" ] || exit 0 + + +umask 022 + +for file in /etc/ha.d/haresources /etc/ha.d/ha.cf; do + if [ -r ${file}.in ]; then + sed -e "s!@LOCAL@!$local!g" -e "s!@FOREIGN@!$foreign!g" \ + < ${file}.in >${file} + fi +done + +exit 0 diff --git a/lustre/scripts/ldev b/lustre/scripts/ldev new file mode 100644 index 0000000..4b7b008 --- /dev/null +++ b/lustre/scripts/ldev @@ -0,0 +1,459 @@ +#!/usr/bin/perl +# +# ldev - parser for /etc/ldev.conf +# +use strict; +use File::Basename; +use Getopt::Long qw/ :config posix_default no_ignore_case/; + +$ENV{PATH} = "/sbin:/usr/sbin:/bin:/usr/bin"; + +my $prog = basename($0); + +my $usage = < "label used more than once", + epairwise => "local and foreign host not mapped to each other", + efieldcount => "line has less than the minimum number of fields (4)", + ekeyval => "malformed id=name", +); + +my %conf = (); + +# +# Main +# + +parse_cmdline (); + +parse_config (); + +sanity () if $conf{sanity}; +exec_cmd () if $conf{execcmd}; +query_partner () if $conf{partner}; +query_local () if $conf{local}; +query_foreign () if $conf{foreign}; +query_all () if $conf{all}; +query_device () if $conf{device}; +query_journal () if $conf{journal}; +query_raidtab () if $conf{raidtab}; +query_type () if $conf{type}; +query_zpool () if $conf{zpool}; + +exit(0); + +# +# Subroutines +# + +sub parse_cmdline +{ + my $help = 0; + my $host = ""; + + $conf{partner} = 0; + $conf{all} = 0; + $conf{local} = 0; + $conf{foreign} = 0; + $conf{config} = "/etc/ldev.conf"; + $conf{nidsfile} = "/etc/nids"; + $conf{hostname} = `uname -n`; chomp $conf{hostname}; + $conf{device} = ""; + $conf{sanity} = 0; + $conf{execcmd} = ""; + $conf{journal} = ""; + + my $rc = GetOptions ( + "help|h!" => \$help, + "partner|p!" => \$conf{partner}, + "all|a!" => \$conf{all}, + "local|l!" => \$conf{local}, + "foreign|f!" => \$conf{foreign}, + "config|c=s" => \$conf{config}, + "nidsfile|n=s" => \$conf{nidsfile}, + "hostname|H=s" => \$conf{hostname}, + "sanity|s!" => \$conf{sanity}, + "device|d=s" => \$conf{device}, + "journal|j=s" => \$conf{journal}, + "raidtab|r=s" => \$conf{raidtab}, + "type|t=s" => \$conf{type}, + "zpool|z=s" => \$conf{zpool}, + ); + + usage() if $help || !$rc; + + log_fatal ("cannot read config file\n") if (! -r $conf{config}); + + if (@ARGV) { + $conf{execcmd} = " " . join " ", @ARGV; + } + + parse_nids () if ($conf{execcmd} =~ /(%n|%N)/); +} + +sub parse_config +{ + my $line = 0; + my %l2f = (); + my %label2local = (); + my %label2dev = (); + my %label2journal = (); + my %label2raidtab = (); + my %label2type = (); + my %label2zpool = (); + my @local_labels = (); + my @foreign_labels = (); + + open (CONF, "< $conf{config}") or log_fatal ("$conf{config}: $!\n"); + + while () { + my $type; + $line++; + s/#.*//; + s/(\s)*$//; + next if (/^(\s)*$/); + chomp; + my ($local, $foreign, $label, $dev, $j, $raidtab) = split; + if ($dev !~ /^\// && $dev =~ /^([^:]+):(.+)$/) { + $type = $1; + $dev = $2; + } + eparse_line ($line, "efieldcount") if (!defined $dev); + eparse_line ($line, "epairwise") if (exists $l2f{$local} + && $l2f{$local} ne $foreign); + $l2f{$local} = $foreign; + + eparse_line ($line, "elabel_uniq") if (exists $label2dev{$label} + || exists $label2local{$label}); + $label2dev{$label} = $dev; + $label2local{$label} = $local; + $label2journal{$label} = $j if defined $j; + $label2raidtab{$label} = $raidtab if defined $raidtab; + if (defined $type) { + $label2type{$label} = $type; + if ($type eq "zfs" && $dev =~ m{^([^/]+)/[^/]+$}) { + $label2zpool{$label} = $1; + } + } + + if ($local eq $conf{hostname}) { + push @local_labels, $label; + } elsif ($foreign eq $conf{hostname}) { + push @foreign_labels, $label; + } + } + close CONF; + + foreach (keys %l2f) { + my $foreign = $l2f{$_}; + next if ($foreign eq "-"); + eparse_str ($_, "epairwise") + unless (!exists $l2f{$foreign} or $l2f{$foreign} eq $_); + } + + @{$conf{local_labels}} = @local_labels; + @{$conf{foreign_labels}} = @foreign_labels; + %{$conf{l2f}} = %l2f; + %{$conf{label2dev}} = %label2dev; + %{$conf{label2local}} = %label2local; + %{$conf{label2journal}} = %label2journal; + %{$conf{label2raidtab}} = %label2raidtab; + %{$conf{label2type}} = %label2type; + %{$conf{label2zpool}} = %label2zpool; +} + +sub parse_nids () +{ + my $line = 0; + my %host2nid = (); + my %nid2host = (); + + open (NIDS, "< $conf{nidsfile}") or log_fatal ("$conf{nidsfile}: $!\n"); + + while () { + $line++; + s/#.*//; + next if (/^(\s)*$/); + chomp; + my ($host, $nid, $morenids) = split (/\s+/, $_, 3); + if (!defined $nid) { + log_fatal ("$conf{nidsfile} line $line: incomplete line\n"); + } + $host2nid{$host} = $nid; + $nid2host{$nid} = $host; + map { $nid2host{$_} = $host; } split (/\s+/, $morenids); + } + close NIDS; + + %{$conf{host2nid}} = %host2nid; + %{$conf{nid2host}} = %nid2host; +} + +sub query_partner +{ + my %l2f = %{$conf{l2f}}; + my $hostname = $conf{hostname}; + if (exists $l2f{$hostname} && $l2f{$hostname} ne "-") { + print "$l2f{$hostname}\n"; + } +} + +sub query_local +{ + map { print "$_\n"; } @{$conf{local_labels}}; +} + +sub query_foreign +{ + map { print "$_\n"; } @{$conf{foreign_labels}}; +} + +sub query_all +{ + query_local (); + query_foreign (); +} + +sub query_device +{ + my %label2dev = %{$conf{label2dev}}; + + if (exists $label2dev{$conf{device}}) { + print "$label2dev{$conf{device}}\n"; + } +} + +sub query_raidtab +{ + my %label2raidtab = %{$conf{label2raidtab}}; + + if (exists $label2raidtab{$conf{raidtab}}) { + print "$label2raidtab{$conf{raidtab}}\n"; + } +} + +sub query_journal +{ + my %label2journal = %{$conf{label2journal}}; + + if (exists $label2journal{$conf{journal}} && + $label2journal{$conf{journal}} ne "-") { + print "$label2journal{$conf{journal}}\n"; + } +} + +sub query_type +{ + my %label2type = %{$conf{label2type}}; + + if (exists $label2type{$conf{type}}) { + print "$label2type{$conf{type}}\n"; + } +} + +sub query_zpool +{ + my %label2zpool = %{$conf{label2zpool}}; + + if (exists $label2zpool{$conf{zpool}}) { + print "$label2zpool{$conf{zpool}}\n"; + } +} + +sub dd_test +{ + my ($dpath) = @_; + my $retval = 0; + my $bs = `blockdev --getss $dpath 2>/dev/null`; chomp $bs; + my $max512 = `blockdev --getsize $dpath 2>/dev/null`; chomp $max512; + if ($? == 0 && $bs > 0 && $max512 > 0) { + my $maxb = ($max512 / $bs) * 512; + my $count = 10 * 1024 * 1024 / $bs; # read first 10mb + my $dev = `readlink -f $dpath`; chomp $dev; + $count = $maxb if ($count > $maxb); + `dd if=$dev of=/dev/null bs=$bs count=$count >/dev/null 2>&1`; + $retval = ($? == 0); + } + return $retval; +} + +sub sanity +{ + my $exit_val = 0; + + my @local_labels = @{$conf{local_labels}}; + my @foreign_labels = @{$conf{foreign_labels}}; + my %label2dev = %{$conf{label2dev}}; + my %label2journal = %{$conf{label2journal}}; + + foreach (@local_labels, @foreign_labels) { + my $lpath = "/dev/disk/by-label/$_"; + my $dpath = $label2dev{$_}; + my $jpath = $label2journal{$_}; + my $label = $_; + if (! -e $lpath) { + log_error ("$lpath does not exist\n"); + $exit_val = 1; + } + if (! -e $dpath) { + log_error ("$dpath does not exist\n"); + $exit_val = 1; + } elsif (!dd_test ($dpath)) { + log_error ("$dpath failed dd test\n"); + $exit_val = 1; + } + if (`readlink -f $lpath` ne `readlink -f $dpath`) { + log_error ("$lpath and $dpath point to different things\n"); + $exit_val = 1; + } + if ($jpath) { + if (! -e $jpath) { + log_error ("$jpath (journal for $label) does not exist\n"); + $exit_val = 1; + } elsif (!dd_test ($jpath)) { + log_error ("$jpath failed dd test\n"); + $exit_val = 1; + } + } + } + exit($exit_val); +} + +sub par_exec +{ + my @pids = (); + my %pid2label = (); + my %pid2cmd = (); + my $pid; + my $result = 0; + + my $tmpfile = `mktemp \${TMPDIR:-/tmp}/ldev.XXXXXXXXXX`; chomp $tmpfile; + log_fatal ("failed to create $tmpfile\n") if (! -e $tmpfile); + + foreach (@_) { + my ($label, $cmd) = split (/\s+/, $_, 2); + my ($basecmd) = split (/\s+/, $cmd); + if (($pid = fork)) { # parent + $pid2label{$pid} = $label; + $pid2cmd{$pid} = $basecmd; + } elsif (defined $pid) { # child + #print STDERR "$label: running $cmd\n"; + exec "($cmd 2>&1 || rm -f $tmpfile) | sed -e 's/^/$label: /'"; + print STDERR "$label: exec $basecmd: $!\n"; unlink $tmpfile; + } else { # error + log_fatal ("label: fork: $!\n"); unlink $tmpfile; + } + } + while (($pid = wait) != -1) { + #print STDERR "$pid2label{$pid}: completed\n"; + } + + # sentinel is intact, so there were no errors + if (-e $tmpfile) { + unlink $tmpfile; + $result = 1; + } + + return $result; +} + +sub exec_cmd +{ + my @labels = (); + my @cmds = (); + my %label2dev = %{$conf{label2dev}}; + my %label2journal = %{$conf{label2journal}}; + my %l2f = %{$conf{l2f}}; + my ($nid, $failnid); + + if ($conf{execcmd} =~ /%n/) { + my %host2nid = %{$conf{host2nid}}; + if (!defined $host2nid{$conf{hostname}}) { + log_fatal ("%n used but no nid defined for this host\n"); + } + $nid = $host2nid{$conf{hostname}}; + } + if ($conf{execcmd} =~ /%N/) { + if (!defined $l2f{$conf{hostname}}) { + log_fatal ("%N used but foreign host is undefined\n"); + } + my %host2nid = %{$conf{host2nid}}; + if (!defined $host2nid{$l2f{$conf{hostname}}}) { + log_fatal ("%N used but foreign nid is undefined\n"); + } + $failnid = $host2nid{$l2f{$conf{hostname}}}; + } + + if ($conf{foreign} and !$conf{local} and !$conf{all}) { + @labels = @{$conf{foreign_labels}}; + } elsif (!$conf{foreign} and !$conf{all}) { + @labels = @{$conf{local_labels}}; + } else { + @labels = (@{$conf{local_labels}}, @{$conf{foreign_labels}}); + } + foreach (@labels) { + /(\w+)-(OST|MDT|MGT)([0-9a-fA-F]{4})/; + + my $fsname = $1; + my $type = $2; $type =~ tr/A-Z/a-z/; + my $hexindex = $3; + my $decindex = hex($3); + my $label = $_; + my $cmd = $conf{execcmd}; + my $device = $label2dev{$_}; + if ($conf{execcmd} =~ /%j/ && !defined $label2journal{$_}) { + log_fatal ("%j used but no journal defined for $_\n"); + } + my $journal = $label2journal{$_}; + + $cmd =~ s/%f/$fsname/g; # %f = fsname + $cmd =~ s/%t/$type/g; # %t = server type + $cmd =~ s/%I/$hexindex/g;# %I = index (hex) + $cmd =~ s/%i/$decindex/g;# %i = index (dec) + $cmd =~ s/%l/$label/g; # %l = label + $cmd =~ s/%d/$device/g; # %d = device + $cmd =~ s/%j/$journal/g; # %j = journal device + $cmd =~ s/%n/$nid/g; # %n = nid + $cmd =~ s/%N/$failnid/g; # %N = failnid + + push @cmds, "$_ $cmd"; + } + + par_exec (@cmds) or log_fatal ("parallel command execution failed\n"); + exit 0; +} + +sub usage +{ + print STDERR "$usage"; + exit 0; +} + +sub log_msg { print STDERR "$prog: ", @_; } +sub log_error { log_msg ("Error: ", @_) } +sub log_fatal { log_msg ("Fatal: ", @_); exit 1; } +sub eparse_line { log_fatal ("$conf{config} line $_[0]: $eparse{$_[1]}\n"); } +sub eparse_str { log_fatal ("$conf{config}: $_[0]: $eparse{$_[1]}\n"); } diff --git a/lustre/scripts/lhbadm b/lustre/scripts/lhbadm new file mode 100644 index 0000000..e8a0a0a --- /dev/null +++ b/lustre/scripts/lhbadm @@ -0,0 +1,137 @@ +#!/bin/bash + +# lhbadm - handle some common heartbeat/lustre failover ops + +PATH=/sbin:/usr/sbin:/usr/bin:$PATH:/usr/lib64/heartbeat:/usr/lib/heartbeat + +declare -r prog=lhbadm + +die () +{ + echo "$prog: $@" + exit 1 +} + +warn () +{ + echo "$prog: $@" +} + +usage () +{ + echo "Usage: $prog status|lstatus|failback|failover" + echo " status - print one-line heartbeat-lustre status" + echo " failover - fail all my active resources over to partner" + echo " failback - fail my normal resources back" + exit 1 +} + +test_mounts () +{ + local label + local lcount=0 + local fcount=0 + local ltot=0 + local ftot=0 + + for label in $(ldev -l); do + ltot=$((ltot + 1)) + if [ "$(service lustre status $label)" == "running" ]; then + lcount=$((lcount + 1)) + fi + done + for label in $(ldev -f); do + ftot=$((ftot+ 1)) + if [ "$(service lustre status $label)" == "running" ]; then + fcount=$((fcount + 1)) + fi + done + + if [ $(($lcount + $fcount)) == 0 ]; then + echo none + elif [ $lcount == $ltot -a $fcount == 0 ]; then + echo local + elif [ $lcount == 0 -a $fcount == $ftot ]; then + echo foreign + elif [ $lcount == $ltot -a $fcount == $ftot ]; then + echo all + else + echo partial + fi +} + +status () +{ + local rstat fstat + local labels + + rstat=$(cl_status rscstatus) || die "cl_status rscstatus failed" + fstat=$(service lustre status) + + if [ "$fstat" == "running" ]; then + fstat=$(test_mounts) + fi + + echo $rstat-$fstat +} + +wait_for_transition () +{ + while sleep 5; do + state=$(cl_status rscstatus) || die "cl_status rscstatus failed" + [ "$state" == "transition" ] || break + done +} + +failover () +{ + local s + + [ "$(id -un)" == "root" ] || die "failover requires root privileges" + [ $# -gt 0 ] || die "please include a descriptive reason for the logs" + + s=$(status) + logger -s -t Lustre-ha -p user.err "failover start, status=$s, reason: $*" + + hb_standby all 2>/dev/null 1>&2 || die "hb_standby all failed" + wait_for_transition + + s=$(status) + logger -s -t Lustre-ha -p user.err "failover complete, status=$s" +} + +failback () +{ + local s + + [ "$(id -un)" == "root" ] || die "failback requires root privileges" + [ $# -gt 0 ] || die "please include a descriptive reason for the logs" + + s=$(status) + logger -s -t Lustre-ha -p user.err "failback start, status=$s, reason: $*" + + hb_takeover local || die "hb_takeover local failed" + wait_for_transition + + s=$(status) + logger -s -t Lustre-ha -p user.err "failover complete, status=$s" +} + + +# +# MAIN +# + +[ $# == 0 ] && usage +[ -x /usr/bin/cl_status ] || die "Heartbeat is not installed" +hstat=$(cl_status hbstatus) || die "$hstat" + +case "$1" in + status) status ;; + lstatus) lstatus ;; + failback) shift; failback $*;; + failover) shift; failover $*;; + *) usage ;; +esac + +# vi: ts=4 sw=4 expandtab diff --git a/lustre/scripts/lnet b/lustre/scripts/lnet new file mode 100644 index 0000000..7033891 --- /dev/null +++ b/lustre/scripts/lnet @@ -0,0 +1,214 @@ +#!/bin/bash +# +# lnet This shell script takes care of starting and stopping +# the lnet (Lustre networking) services. +# +# chkconfig: - 59 76 +# description: Part of the lustre file system. +# probe: true +# config: /etc/sysconfig/lustre + +# Source function library. +[ -f /etc/rc.d/init.d/functions ] && . /etc/rc.d/init.d/functions + +# Source networking configuration and check that networking is up. +[ -f /etc/sysconfig/network ] && . /etc/sysconfig/network && \ +[ "${NETWORKING}" = "no" ] && exit 0 + +# Check for and source configuration file otherwise set defaults +[ -f /etc/sysconfig/lnet ] && . /etc/sysconfig/lnet + +declare -r TOP_MODULES=( \ + obdecho \ + llite \ + lustre \ + osc \ + lov \ + mds \ + mdc \ + mgs \ + mgc \ + ost \ + obdfilter \ + lquota \ + ptlrpc \ +) +declare -r BOTTOM_MODULES=( \ + ksocklnd \ + kqswlnd \ + ko2iblnd \ + fsfilt_ldiskfs \ + obdclass \ + lnet \ + lvfs \ + libcfs \ + ldiskfs \ +) + +declare -r awkprog='BEGIN { rc = -1 } + { if ( $1 == module_name ) { rc = $3; exit; } } + END { print rc }' + +# Usage: run_preexec_check [ start | restart | condrestart ] +# The single parameter will be passed to the PREEXEC_SCRIPT +run_preexec_check () +{ + if [ -n "$PREEXEC_CHECK" ] && ! $PREEXEC_CHECK ; then + echo "Pre-exec check \"$PREEXEC_CHECK\" failed. Aborting." + exit 1 + fi + + if [ -n "$PREEXEC_SCRIPT" ] && ! "$PREEXEC_SCRIPT" "$1" ; then + echo "Pre-exec script \"$PREEXEC_SCRIPT\" failed. Aborting." + exit 1 + fi +} + +# Usage: run_postexec_check [ start | restart | condrestart ] +# The single parameter will be passed to the POSTEXEC_SCRIPT +run_postexec_check () +{ + if [ -n "$POSTEXEC_CHECK" ] && ! $POSTEXEC_CHECK ; then + echo "Post-exec check \"$POSTEXEC_CHECK\" failed. Aborting." + exit 1 + fi + + if [ -n "$POSTEXEC_SCRIPT" ] && ! "$POSTEXEC_SCRIPT" "$1" ; then + echo "Post-exec script \"$POSTEXEC_SCRIPT\" failed. Aborting." + exit 1 + fi +} + +remove_modules () +{ + local modules="${@}" + local ref_cnt + + for mod in $modules; do + ref_cnt=`/sbin/lsmod | awk "$awkprog" "module_name=$mod"` + if [ $ref_cnt -lt 0 ]; then + # module not loaded, skip it + continue + fi + if [ $ref_cnt -gt 0 ]; then + # module in use. maybe it just needs a few seconds + # after removal of previous modules. + sleep 5 + ref_cnt=`/sbin/lsmod | awk "$awkprog" module_name=$mod` + fi + if [ $ref_cnt -eq 0 ]; then + # unload the module + echo "Removing module $mod" + /sbin/rmmod $mod + if [ $? -ne 0 ]; then + echo "ERROR: Failed to remove module $mod." + return 1 + fi + else + # boo! module still in use. + echo "ERROR: Module $mod has non-zero reference count." + return 1 + fi + done + + return 0 +} + +stop_lnet () +{ + local errmsg=`/usr/sbin/lctl network unconfigure 2>&1` + if [ $? -gt 0 ]; then + # The following error message means that lnet is already + # unconfigured, and the modules are not loaded. + echo $errmsg | grep "LNET unconfigure error 19" > /dev/null + if [ $? -gt 0 ]; then + return 0 + else + echo "$errmsg" + return 1 + fi + fi + return 0 +} + +status () +{ + old_nullglob="`shopt -p nullglob`" + shopt -u nullglob + + STATE="stopped" + # LSB compliance - return 3 if service is not running + # Lustre-specific returns + # 150 - partial startup + # 151 - health_check unhealthy + # 152 - LBUG + RETVAL=3 + egrep -q "lnet" /proc/modules && STATE="loaded" + + # check for any routes - on a portals router this is the only thing + [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0 + + # check if this is a router + if [ -d /proc/sys/lnet ]; then + ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`" + if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then + STATE="running" + RETVAL=0 + fi + fi + + # check for error in health_check + HEALTH="/proc/fs/lustre/health_check" + [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=1 + + # check for LBUG + [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152 + + echo $STATE + eval $old_nullglob +} + +# See how we were called. +case "$1" in + start) + run_preexec_check "start" + touch /var/lock/subsys/lnet + modprobe lnet || exit 1 + lctl network up || exit 1 + run_postexec_check "start" + ;; + stop) + run_preexec_check "stop" + remove_modules ${TOP_MODULES[*]} || exit 1 + stop_lnet || exit 1 + remove_modules ${BOTTOM_MODULES[*]} || exit 1 + rm -f /var/lock/subsys/lnet + run_postexec_check "stop" + ;; + status) + status + ;; + restart) + $0 stop + $0 start + ;; + reload) + touch /var/lock/subsys/lnet + ;; + probe) + if [ ! -f /var/lock/subsys/lnet ] ; then + echo $"start"; exit 0 + fi + ;; + condrestart) + [ -f /var/lock/subsys/lnet ] && { + $0 stop + $0 start + } + ;; + *) + echo $"Usage: lustre {start|stop|status|restart|reload|condrestart}" + exit 1 +esac + +exit 0 diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre deleted file mode 100755 index 73c5b22..0000000 --- a/lustre/scripts/lustre +++ /dev/null @@ -1,243 +0,0 @@ -#!/bin/sh -# -# lustre This shell script takes care of starting and stopping Lustre -# -# chkconfig: - 99 1 -# description: Lustre Lite network File System. -# This starts both Lustre client and server functions. -# processname: lconf -# config: /etc/lustre/config.xml -# pidfile: /var/run/lustre.pid -### BEGIN INIT INFO -# Provides: lustre -# Required-Start: $network +sshd -# Required-Stop: $network -# Should-Start: -# Should-Stop: -# Default-Start: -# Default-Stop: 0 1 2 3 4 5 6 -# Short-Description: Lustre Lite network File System. -# Description: This starts both Lustre client and server functions. -### END INIT INFO - - -SERVICE=${0##*/} - -: ${LUSTRE_CFG:=/etc/lustre/lustre.cfg} -[ -f ${LUSTRE_CFG} ] && . ${LUSTRE_CFG} -[ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre - -: ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml} -: ${LCONF:=lconf} -: ${LCTL:=lctl} -# Some distros use modprobe.conf.local -if [ -f /etc/modprobe.conf.local ]; then - : ${MODPROBE_CONF:=/etc/modprobe.conf.local} -else - : ${MODPROBE_CONF:=/etc/modprobe.conf} -fi -# Be sure the proper directories are in PATH. -export PATH="/sbin:$PATH" - -case "$SERVICE" in - [SK][[:digit:]][[:digit:]]lustre | lustre) - SERVICE="lustre" - : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"} - : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"} - ;; - *) - : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"} - : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"} - ;; -esac -LOCK=/var/lock/subsys/$SERVICE - -# Source function library. -if [ -f /etc/init.d/functions ] ; then - . /etc/init.d/functions -fi - -# Source networking configuration. -if [ -f /etc/sysconfig/network ] ; then - . /etc/sysconfig/network -fi - -check_start_stop() { - # Exit codes now LSB compliant - # Check that networking is up. - exit 'not running' - [ "${NETWORKING}" = "no" ] && exit 7 - - # exit 'not installed' - [ -x ${LCONF} -a -x ${LCTL} ] || exit 5 - - if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then - if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then - echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping." - # exit 'not configured' - exit 6 - fi - fi - - # Create /var/lustre directory - # This is used by snmp agent for checking lustre services - # status online/offline/online pending/offline pending. - - [ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR - STATUS=${STATUS_DIR}/sysStatus -} - -start() { - if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then - if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then - cat >&2 <$STATUS - else - echo "online pending" >$STATUS - fi -} - -stop() { - check_start_stop - echo -n "Shutting down $SERVICE: " - if [ $UID -ne 0 ]; then - echo "Lustre should be stopped as root" - RETVAL=4 # insufficent privileges - return - fi - # Cat the modprobe file and place all lines that follow a trailing backslash on the same line -+ ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"` - if [[ ! -z ${ROUTER} ]]; then - MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet` - if [[ ! -z ${MODULE_LOADED} ]]; then - ${LCTL} network unconfigure - fi - ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 - # do it again, in case we tried to unload ksocklnd too early - ${LCTL} modules | awk '{ print $2 }' | xargs rmmod - - else - ${LCONF} ${LCONF_STOP_ARGS} - fi - RETVAL=$? - echo $SERVICE - rm -f $LOCK - if [ $RETVAL -eq 0 ]; then - echo "offline" >$STATUS - else - echo "offline pending" >$STATUS - fi -} - -restart() { - stop - start -} - -status() { - STATE="stopped" - # LSB compliance - return 3 if service is not running - # Lustre-specific returns - # 150 - partial startup - # 151 - health_check unhealthy - # 152 - LBUG - RETVAL=3 - egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded" - - # check for any routes - on a portals router this is the only thing - [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0 - - # check for any configured devices (may indicate partial startup) - if [ -d /proc/fs/lustre ]; then - [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150 - - # check for either a server or a client filesystem - MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`" - OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`" - LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`" - [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0 - else - # check if this is a router - if [ -d /proc/sys/lnet ]; then - ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`" - if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then - STATE="running" - RETVAL=0 - fi - fi - fi - - # check for server disconnections - DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`" - [ "$DISCON" ] && STATE="disconnected" && RETVAL=0 - - # check for servers in recovery - [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" && RETVAL=0 - - # check for error in health_check - HEALTH="/proc/fs/lustre/health_check" - [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151 - - # check for LBUG - [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152 - - # If Lustre is up , check if the service really exists - # Skip this is we are not checking a specific service - if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then - DUMMY=$( $LCTL dl | grep "$SERVICE") - [ $? -ne 0 ] && STATE="not_found" && RETVAL=3 - fi - - echo $STATE -} - -# See how we were called. -case "$1" in - start) - start - ;; - stop) - stop - ;; - restart) - restart - ;; - status) - status $SERVICE - ;; - *) - echo "Usage: $SERVICE {start|stop|restart|status}" - exit 1 -esac - -exit $RETVAL diff --git a/lustre/scripts/lustre.in b/lustre/scripts/lustre.in new file mode 100644 index 0000000..ea8ac39 --- /dev/null +++ b/lustre/scripts/lustre.in @@ -0,0 +1,740 @@ +#!/bin/bash +# +# lustre This shell script takes care of starting and stopping +# the lustre services. +# +# chkconfig: - 60 20 +# description: Part of the lustre file system. +# probe: true +# config: /etc/sysconfig/lustre + +# Source function library. +. /etc/rc.d/init.d/functions + +# Source networking configuration. +if [ ! -f /etc/sysconfig/network ]; then + exit 0 +fi + +. /etc/sysconfig/network + +LDEV=${LDEV:-"/usr/sbin/ldev"} +ZPOOL_LAYOUT=/usr/bin/zpool_layout +UDEVADM=${UDEVADM:-/sbin/udevadm} + +# Check that networking is up. +[ "${NETWORKING}" = "no" ] && exit 0 + +# Check for and source configuration file otherwise set defaults +[ -f /etc/sysconfig/lustre ] && . /etc/sysconfig/lustre +FSCK_ARGS=${FSCK_ARGS:-""} +MOUNT_OPTIONS=${MOUNT_OPTIONS:-""} +LOCAL_SRV=${LOCAL_SRV:-"`$LDEV -l 2>/dev/null`"} +FOREIGN_SRV=${FOREIGN_SRV:-"`$LDEV -f 2>/dev/null`"} +REQUIRE_MMP_FEATURE=${REQUIRE_MMP_FEATURE:-${FOREIGN_SRV:+"yes"}} +LOCAL_MOUNT_DIR=${LOCAL_MOUNT_DIR:-"/mnt/lustre/local"} +FOREIGN_MOUNT_DIR=${FOREIGN_MOUNT_DIR:-"/mnt/lustre/foreign"} +SETUP_DEVICES=${SETUP_DEVICES:-""} +ZPOOL_LAYOUT_BUSES=${ZPOOL_LAYOUT_BUSES:-""} +ZPOOL_LAYOUT_PORTS=${ZPOOL_LAYOUT_PORTS:-""} +ZPOOL_LAYOUT_MAP=${ZPOOL_LAYOUT_MAP:-""} +MOUNT_DELAY=${MOUNT_DELAY:-2} +LOAD_ZFS=${LOAD_ZFS:-""} + +shopt -s nullglob + +start_zfs_services () +{ + if [ -n "$ZPOOL_LAYOUT_BUSES" -a -n "$ZPOOL_LAYOUT_PORTS" ] ; then + MAP_ARG=${ZPOOL_LAYOUT_MAP:+"-m $ZPOOL_LAYOUT_MAP"} + $ZPOOL_LAYOUT -t -b "$ZPOOL_LAYOUT_BUSES" \ + -p "$ZPOOL_LAYOUT_PORTS" $MAP_ARG + fi + if [ "$LOAD_ZFS" = "yes" ] && ! modprobe zfs ; then + echo "Failed to load zfs module. Aborting." + exit 1 + fi +} + +stop_devices () +{ + local labels=$* + local result=0 + local label devtype + for label in $labels; do + devtype=`$LDEV -t $label` + if [ "$devtype" = "zfs" ] ; then + export_zpool $label + elif [ "$devtype" = "md" ] ; then + dev=`label_to_device $label` + journal=`$LDEV -j $label` + stop_md_device $dev + stop_md_device $journal + fi + done +} + +import_zpool () +{ + local result=1 + local label=$1 + local pool=`$LDEV -z $label` + local args="-N $ZPOOL_IMPORT_ARGS" + local cache=`$LDEV -r $label` + # -c is incompatible with -d + if [ -n "$cache" ] ; then + args="$args -c $cache" + elif [ -n "$ZPOOL_IMPORT_DIR" ] ; then + args="$args -d $ZPOOL_IMPORT_DIR" + elif [ -d "/dev/disk/by-vdev" ] ; then + args="$args -d /dev/disk/by-vdev" + elif [ -d "/dev/mapper" ] ; then + args="$args -d /dev/mapper" + fi + + if zpool status $pool >/dev/null 2>&1 ; then + result=0 + elif [ -n "$pool" ] ; then + zpool import $pool $args 2>/dev/null + result=$? + fi + return $result +} + +export_zpool () +{ + local label=$1 + local pool=`$LDEV -z $label` + zpool export $pool 2>/dev/null +} + +# Trigger udev and wait for it to settle. +udev_trigger() +{ + if [ -x ${UDEVADM} ]; then + ${UDEVADM} trigger --action=change --subsystem-match=block + ${UDEVADM} settle + else + /sbin/udevtrigger + /sbin/udevsettle + fi +} + +# Usage: run_preexec_check [ start | restart | condrestart ] +# The single parameter will be passed to the PREEXEC_SCRIPT +run_preexec_check () +{ + if [ -n "$PREEXEC_CHECK" ] && ! $PREEXEC_CHECK ; then + echo "Pre-exec check \"$PREEXEC_CHECK\" failed. Aborting." + exit 1 + fi + + if [ -n "$PREEXEC_SCRIPT" ] && ! "$PREEXEC_SCRIPT" "$1" ; then + echo "Pre-exec script \"$PREEXEC_SCRIPT\" failed. Aborting." + exit 1 + fi +} + +# Usage: run_postexec_check [ start | restart | condrestart ] +# The single parameter will be passed to the PREEXEC_SCRIPT +run_postexec_check () +{ + if [ -n "$POSTEXEC_CHECK" ] && ! $POSTEXEC_CHECK ; then + echo "Post-exec check \"$POSTEXEC_CHECK\" failed. Aborting." + exit 1 + fi + + if [ -n "$POSTEXEC_SCRIPT" ] && ! "$POSTEXEC_SCRIPT" "$1" ; then + echo "Post-exec script \"$POSTEXEC_SCRIPT\" failed. Aborting." + exit 1 + fi +} + +# Usage: adjust_scsi_timeout +adjust_scsi_timeout () +{ + local dev=$1 + + if [ -n "$SCSI_DEVICE_TIMEOUT" ]; then + # make sure that it is actually a SCSI (sd) device + local name=`basename $dev` + local proc=/sys/block/${name}/device/timeout + local driver=`readlink /sys/block/${name}/device/driver` + if [ -n "$driver" ] && [ "`basename $driver`" == "sd" ]; then + if ! echo $SCSI_DEVICE_TIMEOUT >$proc; then + echo "FAILED: could not adjust ${dev} timeout" + return 1 + fi + fi + fi + return 0 +} + +# Usage: fsck_test [ ... ] +# Checks all devices in parallel if FSCK_ARGS is set. +fsck_test () +{ + local devices="$*" + + # Filter out non-absolute paths, which are probably ZFS datasets + devices=`echo $devices |xargs -n 1|grep '^/'|xargs` + + if [ -n "${FSCK_ARGS}" -a -n "$devices" ]; then + if [ -x /sbin/@PFSCK@ ] ; then + echo "@PFSCK@ $devices -- ${FSCK_ARGS}" + /sbin/@PFSCK@ $devices -- ${FSCK_ARGS} + if [ $? -ne 0 -a $? -ne 1 ] ; then + echo "FAILED: @PFSCK@ -- ${FSCK_ARGS}: $?" + return 1 + fi + else + echo "/sbin/@PFSCK@ not found" + return 1 + fi + fi + return 0 +} + +# Usage: test_feature_flag +test_feature_flag() +{ + local dev=$1 + local flag=$2 + local result=1 + local feature + + for feature in `/sbin/@TUNE2FS@ -l $dev 2>/dev/null \ + | grep features: | sed -e 's/^.*: //'`; do + if [ "$feature" == "$flag" ]; then + result=0 + break + fi + done + + return $result +} + +# Usage: mmp_test +# Returns 0 if it is set or not required, 1 if unset and required or error. +mmp_test () +{ + local dev=$1 + local result=0 + + if [ "$REQUIRE_MMP_FEATURE" == "yes" ]; then + if [ -x /sbin/@TUNE2FS@ ]; then + if ! test_feature_flag $dev "mmp"; then + echo "mmp feature flag is not set on $dev" + result=1 + fi + else + echo "/sbin/@TUNE2FS@ not found" + result=1 + fi + fi + + return $result +} + +# Usage: label_to_mountpt