LU-2181 tests: test failure on conf-sanity, subtest 23a

[fs/lustre-release.git] / lustre / tests / conf-sanity.sh
diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh

index e0df8b8..b55c9d3 100644 (file)
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -13,15 +13,34 @@ ONLY=${ONLY:-"$*"}
  ALWAYS_EXCEPT="$CONF_SANITY_EXCEPT"
  # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
  
+is_sles11()                                            # LU-2181
+{
+       if [ -r /etc/SuSE-release ]
+       then
+               local vers=`grep VERSION /etc/SuSE-release | awk '{print $3}'`
+               local patchlev=`grep PATCHLEVEL /etc/SuSE-release \
+                       | awk '{print $3}'`
+               if [ $vers -eq 11 ] && [ $patchlev -eq 2 ]
+               then
+                       return 0
+               fi
+       fi
+       return 1
+}
+
+if is_sles11; then                                     # LU-2181
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 23a 34b"
+fi
+
  if [ "$FAILURE_MODE" = "HARD" ]; then
         CONFIG_EXCEPTIONS="24a " && \
         echo "Except the tests: $CONFIG_EXCEPTIONS for FAILURE_MODE=$FAILURE_MODE, bug 23573" && \
         ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS"
  fi
  
-# LU-2059
-ALWAYS_EXCEPT="$ALWAYS_EXCEPT 5d 19b 21b 27a"
-
+# bug number for skipped test:
+# a tool to create lustre filesystem images
+ALWAYS_EXCEPT="32newtarball $ALWAYS_EXCEPT"
  
  SRCDIR=`dirname $0`
  PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
@@ -418,6 +437,9 @@ test_5d() {
         grep " $MOUNT " /etc/mtab && \
                 error false "unexpected entry in mtab before mount" && return 10
  
+       [ "$(facet_fstype ost1)" = "zfs" ] &&
+               skip "LU-2059: no local config for ZFS OSTs" && return
+
         local rc=0
         start_ost
         start_mds
@@ -657,6 +679,9 @@ test_19a() {
  run_test 19a "start/stop MDS without OSTs"
  
  test_19b() {
+       [ "$(facet_fstype ost1)" = "zfs" ] &&
+               skip "LU-2059: no local config for ZFS OSTs" && return
+
         start_ost || return 1
         stop_ost -f || return 2
  }
@@ -693,6 +718,9 @@ test_21a() {
  run_test 21a "start mds before ost, stop ost first"
  
  test_21b() {
+       [ "$(facet_fstype ost1)" = "zfs" ] &&
+               skip "LU-2059: no local config for ZFS OSTs" && return
+
          start_ost
         start_mds
          wait_osc_import_state mds ost FULL
@@ -958,6 +986,9 @@ test_26() {
  run_test 26 "MDT startup failure cleans LOV (should return errs)"
  
  test_27a() {
+       [ "$(facet_fstype ost1)" = "zfs" ] &&
+               skip "LU-2059: no local config for ZFS OSTs" && return
+
         start_ost || return 1
         start_mds || return 2
         echo "Requeue thread should have started: "
@@ -1149,202 +1180,439 @@ test_31() { # bug 10734
  }
  run_test 31 "Connect to non-existent node (shouldn't crash)"
  
-# Use these start32/stop32 fn instead of t-f start/stop fn,
-# for local devices, to skip global facet vars init
-stop32 () {
-       local facet=$1
-       shift
-       echo "Stopping local ${MOUNT%/*}/${facet} (opts:$@)"
-       umount -d $@ ${MOUNT%/*}/${facet}
-       losetup -a
-}
-
-start32 () {
-       local facet=$1
-       shift
-       local device=$1
-       shift
-       mkdir -p ${MOUNT%/*}/${facet}
-
-       echo "Starting local ${facet}: $@ $device ${MOUNT%/*}/${facet}"
-       mount -t lustre $@ ${device} ${MOUNT%/*}/${facet}
-       local RC=$?
-       if [ $RC -ne 0 ]; then
-               echo "mount -t lustre $@ ${device} ${MOUNT%/*}/${facet}"
-               echo "Start of ${device} of local ${facet} failed ${RC}"
-       fi
-       losetup -a
-       return $RC
-}
-
-cleanup_nocli32 () {
-       stop32 mds1 -f
-       stop32 ost1 -f
-       wait_exit_ST client
-}
-
-cleanup_32() {
-       trap 0
-       echo "Cleanup test_32 umount $MOUNT ..."
-       umount -f $MOUNT || true
-       echo "Cleanup local mds ost1 ..."
-       cleanup_nocli32
-       combined_mgs_mds || start_mgs
-       unload_modules_conf
-}
-
-test_32a() {
-       if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
-               skip "Only applicable to ldiskfs-based MDTs"
-               return
+#
+# This is not really a test but a tool to create new disk
+# image tarballs for the upgrade tests.
+#
+# Disk image tarballs should be created on single-node
+# clusters by running this test with default configurations
+# plus a few mandatory environment settings that are verified
+# at the beginning of the test.
+#
+test_32newtarball() {
+       local version
+       local dst=.
+       local src=/etc/rc.d
+       local tmp=$TMP/t32_image_create
+
+       if [ $FSNAME != t32fs -o $MDSCOUNT -ne 1 -o                                                             \
+                \( -z "$MDSDEV" -a -z "$MDSDEV1" \) -o $OSTCOUNT -ne 1 -o                      \
+                -z "$OSTDEV1" ]; then
+               error "Needs FSNAME=t32fs MDSCOUNT=1 MDSDEV1=<nonexistent_file>"        \
+                         "(or MDSDEV, in the case of b1_8) OSTCOUNT=1"                                 \
+                         "OSTDEV1=<nonexistent_file>"
         fi
  
-       client_only && skip "client only testing" && return 0
-       [ "$NETTYPE" = "tcp" ] || { skip "NETTYPE != tcp" && return 0; }
-       [ -z "$TUNEFS" ] && skip_env "No tunefs" && return 0
+       mkdir $tmp || {
+               echo "Found stale $tmp"
+               return 1
+       }
  
-       local DISK1_8=$LUSTRE/tests/disk1_8.tar.bz2
-       [ ! -r $DISK1_8 ] && skip_env "Cannot find $DISK1_8" && return 0
-       local tmpdir=$TMP/conf32a
-       mkdir -p $tmpdir
-       tar xjvf $DISK1_8 -C $tmpdir || \
-               { skip_env "Cannot untar $DISK1_8" && return 0; }
+       mkdir $tmp/src
+       tar cf - -C $src . | tar xf - -C $tmp/src
  
-       load_modules
-       $LCTL set_param debug="$PTLDEBUG"
+       formatall
  
-       $TUNEFS $tmpdir/mds || error "tunefs failed"
+       setupall
+       tar cf - -C $tmp/src . | tar xf - -C /mnt/$FSNAME
+       stopall
  
-       combined_mgs_mds || stop mgs
+       mkdir $tmp/img
  
-       # nids are wrong, so client wont work, but server should start
-       start32 mds1 $tmpdir/mds "-o loop,exclude=lustre-OST0000" && \
-               trap cleanup_32 EXIT INT || return 3
+       setupall
+       pushd /mnt/$FSNAME
+       ls -Rni --time-style=+%s >$tmp/img/list
+       find . ! -name .lustre -type f -exec sha1sum {} \; |
+               sort -k 2 >$tmp/img/sha1sums
+       popd
+       $LCTL get_param -n version | head -n 1 |
+               sed -e 's/^lustre: *//' >$tmp/img/commit
+       stopall
  
-       local UUID=$($LCTL get_param -n mdt.lustre-MDT0000.uuid)
-       echo MDS uuid $UUID
-       [ "$UUID" == "lustre-MDT0000_UUID" ] || error "UUID is wrong: $UUID"
+       pushd $tmp/src
+       find -type f -exec sha1sum {} \; | sort -k 2 >$tmp/sha1sums.src
+       popd
  
-       $TUNEFS --mgsnode=$HOSTNAME $tmpdir/ost1 || error "tunefs failed"
-       start32 ost1 $tmpdir/ost1 "-o loop" || return 5
-       UUID=$($LCTL get_param -n obdfilter.lustre-OST0000.uuid)
-       echo OST uuid $UUID
-       [ "$UUID" == "lustre-OST0000_UUID" ] || error "UUID is wrong: $UUID"
+       if ! diff -u $tmp/sha1sums.src $tmp/img/sha1sums; then
+               echo "Data verification failed"
+       fi
  
-       local NID=$($LCTL list_nids | head -1)
+       uname -r >$tmp/img/kernel
+       uname -m >$tmp/img/arch
  
-       echo "OSC changes should succeed:"
-       $LCTL conf_param lustre-OST0000.osc.max_dirty_mb=15 || return 7
-       $LCTL conf_param lustre-OST0000.failover.node=$NID || return 8
-       echo "ok."
+       mv ${MDSDEV1:-$MDSDEV} $tmp/img
+       mv $OSTDEV1 $tmp/img
  
-       echo "MDC changes should succeed:"
-       $LCTL conf_param lustre-MDT0000.mdc.max_rpcs_in_flight=9 || return 9
-       $LCTL conf_param lustre-MDT0000.failover.node=$NID || return 10
-       echo "ok."
+       version=$(sed -e 's/\(^[0-9]\+\.[0-9]\+\)\(.*$\)/\1/' $tmp/img/commit |
+                         sed -e 's/\./_/g')    # E.g., "1.8.7" -> "1_8"
+       dst=$(cd $dst; pwd)
+       pushd $tmp/img
+       tar cjvf $dst/disk$version-$(facet_fstype $SINGLEMDS).tar.bz2 -S *
+       popd
  
-       echo "LOV changes should succeed:"
-       $LCTL pool_new lustre.interop || return 11
-       $LCTL conf_param lustre-MDT0000.lov.stripesize=4M || return 12
-       echo "ok."
+       rm -r $tmp
+}
+#run_test 32newtarball "Create a new test_32 disk image tarball for this version"
  
-       cleanup_32
+#
+# The list of applicable tarballs is returned via the caller's
+# variable "tarballs".
+#
+t32_check() {
+       local node=$(facet_active_host $SINGLEMDS)
+       local r="do_node $node"
  
-       # mount a second time to make sure we didnt leave upgrade flag on
-       load_modules
-       $TUNEFS --dryrun $tmpdir/mds || error "tunefs failed"
+       if [ "$CLIENTONLY" ]; then
+               skip "Client-only testing"
+               exit 0
+       fi
  
-       combined_mgs_mds || stop mgs
+       if ! $r which $TUNEFS; then
+               skip_env "tunefs.lustre required on $node"
+               exit 0
+       fi
  
-       start32 mds1 $tmpdir/mds "-o loop,exclude=lustre-OST0000" && \
-               trap cleanup_32 EXIT INT || return 12
+       if [ -n "$($LCTL list_nids | grep -v '\(tcp\|lo\)[[:digit:]]*$')" ]; then
+               skip "LU-2200: Test cannot run over Infiniband"
+               exit 0
+       fi
  
-       cleanup_32
+       local IMGTYPE=$(facet_fstype $SINGLEMDS)
  
-       rm -rf $tmpdir || true  # true is only for TMP on NFS
-}
-run_test 32a "Upgrade from 1.8 (not live)"
+       tarballs=$($r find $RLUSTRE/tests -maxdepth 1 -name \'disk*-$IMGTYPE.tar.bz2\')
  
-test_32b() {
-       if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
-               skip "Only applicable to ldiskfs-based MDTs"
-               return
+       if [ -z "$tarballs" ]; then
+               skip "No applicable tarballs found"
+               exit 0
         fi
+}
  
-       client_only && skip "client only testing" && return 0
-       [ "$NETTYPE" = "tcp" ] || { skip "NETTYPE != tcp" && return 0; }
-       [ -z "$TUNEFS" ] && skip_env "No tunefs" && return
+t32_test_cleanup() {
+       local node=$(facet_active_host $SINGLEMDS)
+       local r="do_node $node"
+       local tmp=$TMP/t32
+       local rc=$?
  
-       local DISK1_8=$LUSTRE/tests/disk1_8.tar.bz2
-       [ ! -r $DISK1_8 ] && skip_env "Cannot find $DISK1_8" && return 0
-       local tmpdir=$TMP/conf32b
-       mkdir -p $tmpdir
-       tar xjvf $DISK1_8 -C $tmpdir || \
-               { skip_env "Cannot untar $DISK1_8" && return ; }
+       if $shall_cleanup_lustre; then
+               umount $tmp/mnt/lustre || rc=$?
+       fi
+       if $shall_cleanup_mdt; then
+               $r umount -d $tmp/mnt/mdt || rc=$?
+       fi
+       if $shall_cleanup_ost; then
+               $r umount -d $tmp/mnt/ost || rc=$?
+       fi
+       $r rm -rf $tmp || rc=$?
+       rm -rf $tmp || rc=$?
+       return $rc
+}
  
-       load_modules
-       $LCTL set_param debug="+config"
-       local NEWNAME=lustre
+t32_bits_per_long() {
+       #
+       # Yes, this is not meant to be perfect.
+       #
+       case $1 in
+               ppc64|x86_64)
+                       echo -n 64;;
+               i*86)
+                       echo -n 32;;
+       esac
+}
  
-       # writeconf will cause servers to register with their current nids
-       $TUNEFS --writeconf --erase-params \
-        --param mdt.identity_upcall=$L_GETIDENTITY \
-        --fsname=$NEWNAME $tmpdir/mds || error "tunefs failed"
-       combined_mgs_mds || stop mgs
+t32_reload_modules() {
+       local node=$1
+       local all_removed=false
+       local i=0
+
+       while ((i < 20)); do
+               echo "Unloading modules on $node: Attempt $i"
+               do_rpc_nodes $node $LUSTRE_RMMOD $(facet_fstype $SINGLEMDS) &&
+                       all_removed=true
+               do_rpc_nodes $node check_mem_leak || return 1
+               if $all_removed; then
+                       load_modules
+                       return 0
+               fi
+               sleep 5
+               i=$((i + 1))
+       done
+       echo "Unloading modules on $node: Given up"
+       return 1
+}
  
-       start32 mds1 $tmpdir/mds "-o loop" && \
-               trap cleanup_32 EXIT INT || return 3
+t32_wait_til_devices_gone() {
+       local node=$1
+       local devices
+       local i=0
+
+       echo wait for devices to go
+       while ((i < 20)); do
+               devices=$(do_rpc_nodes $node $LCTL device_list | wc -l)
+               echo $device
+               ((devices == 0)) && return 1
+               sleep 5
+               i=$((i + 1))
+       done
+       echo "waiting for devices on $node: Given up"
+       return 1
+}
+
+t32_test() {
+       local tarball=$1
+       local writeconf=$2
+       local shall_cleanup_mdt=false
+       local shall_cleanup_ost=false
+       local shall_cleanup_lustre=false
+       local node=$(facet_active_host $SINGLEMDS)
+       local r="do_node $node"
+       local tmp=$TMP/t32
+       local img_commit
+       local img_kernel
+       local img_arch
+       local fsname=t32fs
+       local nid=$($r $LCTL list_nids | head -1)
+       local mopts
+       local uuid
+       local nrpcs_orig
+       local nrpcs
+       local list
+
+       trap 'trap - RETURN; t32_test_cleanup' RETURN
+
+       mkdir -p $tmp/mnt/lustre
+       $r mkdir -p $tmp/mnt/{mdt,ost}
+       $r tar xjvf $tarball -S -C $tmp || {
+               error_noexit "Unpacking the disk image tarball"
+               return 1
+       }
+       img_commit=$($r cat $tmp/commit)
+       img_kernel=$($r cat $tmp/kernel)
+       img_arch=$($r cat $tmp/arch)
+       echo "Upgrading from $(basename $tarball), created with:"
+       echo "  Commit: $img_commit"
+       echo "  Kernel: $img_kernel"
+       echo "    Arch: $img_arch"
+
+       $r $LCTL set_param debug="$PTLDEBUG"
+
+       $r $TUNEFS --dryrun $tmp/mdt || {
+               error_noexit "tunefs.lustre before mounting the MDT"
+               return 1
+       }
+       if [ "$writeconf" ]; then
+               mopts=loop,writeconf
+       else
+               mopts=loop,exclude=$fsname-OST0000
+       fi
  
-       local UUID=$($LCTL get_param -n mdt.${NEWNAME}-MDT0000.uuid)
-       echo MDS uuid $UUID
-       [ "$UUID" == "${NEWNAME}-MDT0000_UUID" ] || error "UUID is wrong: $UUID"
+       t32_wait_til_devices_gone $node
  
-       $TUNEFS  --writeconf --erase-params \
-        --mgsnode=$HOSTNAME --fsname=$NEWNAME $tmpdir/ost1 ||\
-           error "tunefs failed"
-       start32 ost1 $tmpdir/ost1 "-o loop" || return 5
-       UUID=$($LCTL get_param -n obdfilter.${NEWNAME}-OST0000.uuid)
-       echo OST uuid $UUID
-       [ "$UUID" == "${NEWNAME}-OST0000_UUID" ] || error "UUID is wrong: $UUID"
+       $r mount -t lustre -o $mopts $tmp/mdt $tmp/mnt/mdt || {
+               error_noexit "Mounting the MDT"
+               return 1
+       }
+       shall_cleanup_mdt=true
  
-       local NID=$($LCTL list_nids | head -1)
+       uuid=$($r $LCTL get_param -n mdt.$fsname-MDT0000.uuid) || {
+               error_noexit "Getting MDT UUID"
+               return 1
+       }
+       if [ "$uuid" != $fsname-MDT0000_UUID ]; then
+               error_noexit "Unexpected MDT UUID: \"$uuid\""
+               return 1
+       fi
  
-       echo "OSC changes should succeed:"
-       $LCTL conf_param ${NEWNAME}-OST0000.osc.max_dirty_mb=15 || return 7
-       $LCTL conf_param ${NEWNAME}-OST0000.failover.node=$NID || return 8
-       echo "ok."
+       $r $TUNEFS --dryrun $tmp/ost || {
+               error_noexit "tunefs.lustre before mounting the OST"
+               return 1
+       }
+       if [ "$writeconf" ]; then
+               mopts=loop,mgsnode=$nid,$writeconf
+       else
+               mopts=loop,mgsnode=$nid
+       fi
+       $r mount -t lustre -o $mopts $tmp/ost $tmp/mnt/ost || {
+               error_noexit "Mounting the OST"
+               return 1
+       }
+       shall_cleanup_ost=true
  
-       echo "MDC changes should succeed:"
-       $LCTL conf_param ${NEWNAME}-MDT0000.mdc.max_rpcs_in_flight=9 || return 9
-       $LCTL conf_param ${NEWNAME}-MDT0000.failover.node=$NID || return 10
-       echo "ok."
+       uuid=$($r $LCTL get_param -n obdfilter.$fsname-OST0000.uuid) || {
+               error_noexit "Getting OST UUID"
+               return 1
+       }
+       if [ "$uuid" != $fsname-OST0000_UUID ]; then
+               error_noexit "Unexpected OST UUID: \"$uuid\""
+               return 1
+       fi
  
-       echo "LOV changes should succeed:"
-       $LCTL pool_new ${NEWNAME}.interop || return 11
-       $LCTL conf_param ${NEWNAME}-MDT0000.lov.stripesize=4M || return 12
-       echo "ok."
+       $r $LCTL conf_param $fsname-OST0000.osc.max_dirty_mb=15 || {
+               error_noexit "Setting \"max_dirty_mb\""
+               return 1
+       }
+       $r $LCTL conf_param $fsname-OST0000.failover.node=$nid || {
+               error_noexit "Setting OST \"failover.node\""
+               return 1
+       }
+       $r $LCTL conf_param $fsname-MDT0000.mdc.max_rpcs_in_flight=9 || {
+               error_noexit "Setting \"max_rpcs_in_flight\""
+               return 1
+       }
+       $r $LCTL conf_param $fsname-MDT0000.failover.node=$nid || {
+               error_noexit "Setting MDT \"failover.node\""
+               return 1
+       }
+       $r $LCTL pool_new $fsname.interop || {
+               error_noexit "Setting \"interop\""
+               return 1
+       }
+       $r $LCTL conf_param $fsname-MDT0000.lov.stripesize=4M || {
+               error_noexit "Setting \"lov.stripesize\""
+               return 1
+       }
+
+       if [ "$writeconf" ]; then
+               mount -t lustre $nid:/$fsname $tmp/mnt/lustre || {
+                       error_noexit "Mounting the client"
+                       return 1
+               }
+               shall_cleanup_lustre=true
+               $LCTL set_param debug="$PTLDEBUG"
+
+               if $r test -f $tmp/sha1sums; then
+                       $r sort -k 2 $tmp/sha1sums >$tmp/sha1sums.orig
+                       pushd $tmp/mnt/lustre
+                       find ! -name .lustre -type f -exec sha1sum {} \; |
+                               sort -k 2 >$tmp/sha1sums || {
+                               error_noexit "sha1sum"
+                               return 1
+                       }
+                       popd
+                       if ! diff -ub $tmp/sha1sums.orig $tmp/sha1sums; then
+                               error_noexit "sha1sum verification failed"
+                               return 1
+                       fi
+               else
+                       echo "sha1sum verification skipped"
+               fi
  
-       # MDT and OST should have registered with new nids, so we should have
-       # a fully-functioning client
-       echo "Check client and old fs contents"
+               if $r test -f $tmp/list; then
+                       #
+                       # There is not a Test Framework API to copy files to or
+                       # from a remote node.
+                       #
+                       $r sort -k 6 $tmp/list >$tmp/list.orig
+                       pushd $tmp/mnt/lustre
+                       ls -Rni --time-style=+%s | sort -k 6 >$tmp/list || {
+                               error_noexit "ls"
+                               return 1
+                       }
+                       popd
+                       #
+                       # 32-bit and 64-bit clients use different algorithms to
+                       # convert FIDs into inode numbers.  Hence, remove the inode
+                       # numbers from the lists, if the original list was created
+                       # on an architecture with different number of bits per
+                       # "long".
+                       #
+                       if [ $(t32_bits_per_long $(uname -m)) !=                                                \
+                                $(t32_bits_per_long $img_arch) ]; then
+                               echo "Different number of bits per \"long\" from the disk image"
+                               for list in list.orig list; do
+                                       sed -i -e 's/^[0-9]\+[ \t]\+//' $tmp/$list
+                               done
+                       fi
+                       if ! diff -ub $tmp/list.orig $tmp/list; then
+                               error_noexit "list verification failed"
+                               return 1
+                       fi
+               else
+                       echo "list verification skipped"
+               fi
  
-       local device=`h2$NETTYPE $HOSTNAME`:/$NEWNAME
-       echo "Starting local client: $HOSTNAME: $device $MOUNT"
-       mount -t lustre $device $MOUNT || return 1
+               #
+               # When adding new data verification tests, please check for
+               # the presence of the required reference files first, like
+               # the "sha1sums" and "list" tests above, to avoid the need to
+               # regenerate every image for each test addition.
+               #
+
+               nrpcs_orig=$($LCTL get_param -n mdc.*.max_rpcs_in_flight) || {
+                       error_noexit "Getting \"max_rpcs_in_flight\""
+                       return 1
+               }
+               nrpcs=$((nrpcs_orig + 5))
+               $r $LCTL conf_param $fsname-MDT0000.mdc.max_rpcs_in_flight=$nrpcs || {
+                       error_noexit "Changing \"max_rpcs_in_flight\""
+                       return 1
+               }
+               wait_update $HOSTNAME "$LCTL get_param -n mdc.*.max_rpcs_in_flight"     \
+                           $nrpcs || {
+                       error_noexit "Verifying \"max_rpcs_in_flight\""
+                       return 1
+               }
+
+               umount $tmp/mnt/lustre || {
+                       error_noexit "Unmounting the client"
+                       return 1
+               }
+               shall_cleanup_lustre=false
+       else
+               $r umount -d $tmp/mnt/mdt || {
+                       error_noexit "Unmounting the MDT"
+                       return 1
+               }
+               shall_cleanup_mdt=false
+               $r umount -d $tmp/mnt/ost || {
+                       error_noexit "Unmounting the OST"
+                       return 1
+               }
+               shall_cleanup_ost=false
+
+               t32_reload_modules $node || {
+                       error_noexit "Reloading modules"
+                       return 1
+               }
+
+               # mount a second time to make sure we didnt leave upgrade flag on
+               $r $TUNEFS --dryrun $tmp/mdt || {
+                       error_noexit "tunefs.lustre before remounting the MDT"
+                       return 1
+               }
+               $r mount -t lustre -o loop,exclude=$fsname-OST0000 $tmp/mdt                     \
+                                $tmp/mnt/mdt || {
+                       error_noexit "Remounting the MDT"
+                       return 1
+               }
+               shall_cleanup_mdt=true
+       fi
+}
  
-       local old=$($LCTL get_param -n mdc.*.max_rpcs_in_flight)
-       local new=$((old + 5))
-       $LCTL conf_param ${NEWNAME}-MDT0000.mdc.max_rpcs_in_flight=$new
-       wait_update $HOSTNAME "$LCTL get_param -n mdc.*.max_rpcs_in_flight" $new || return 11
+test_32a() {
+       local tarballs
+       local tarball
+       local rc=0
  
-       [ "$(cksum $MOUNT/passwd | cut -d' ' -f 1,2)" == "94306271 1478" ] || return 12
-       echo "ok."
+       t32_check
+       for tarball in $tarballs; do
+               t32_test $tarball || rc=$?
+       done
+       return $rc
+}
+run_test 32a "Upgrade (not live)"
  
-       cleanup_32
+test_32b() {
+       local tarballs
+       local tarball
+       local rc=0
  
-       rm -rf $tmpdir || true  # true is only for TMP on NFS
+       t32_check
+       for tarball in $tarballs; do
+               t32_test $tarball writeconf || rc=$?
+       done
+       return $rc
  }
-run_test 32b "Upgrade from 1.8 with writeconf"
+run_test 32b "Upgrade with writeconf"
  
  test_33a() { # bug 12333, was test_33
          local rc=0
@@ -1398,7 +1666,7 @@ test_33b() {      # was test_34
  
          do_facet client dd if=/dev/zero of=$MOUNT/24 bs=1024k count=1
          # Drop lock cancelation reply during umount
-       #define OBD_FAIL_LDLM_CANCEL             0x304
+       #define OBD_FAIL_LDLM_CANCEL_NET                        0x304
          do_facet client lctl set_param fail_loc=0x80000304
          #lctl set_param debug=-1
          umount_client $MOUNT
@@ -1462,7 +1730,8 @@ test_35a() { # bug 12459
         log "Set up a fake failnode for the MDS"
         FAKENID="127.0.0.2"
         local device=$(do_facet $SINGLEMDS "lctl get_param -n devices" | awk '($3 ~ "mdt" && $4 ~ "MDT") { print $4 }' | head -1)
-       do_facet mgs $LCTL conf_param ${device}.failover.node=$FAKENID || return 4
+       do_facet mgs "$LCTL conf_param ${device}.failover.node=" \
+               "$(h2$NETTYPE $FAKENID)" || return 4
  
         log "Wait for RECONNECT_INTERVAL seconds (10s)"
         sleep 10
@@ -1516,8 +1785,8 @@ test_35b() { # bug 18674
         FAKENID="127.0.0.2"
         local device=$(do_facet $SINGLEMDS "$LCTL get_param -n devices" | \
                         awk '($3 ~ "mdt" && $4 ~ "MDT") { print $4 }' | head -1)
-       do_facet mgs "$LCTL conf_param ${device}.failover.node=$FAKENID" || \
-               return 1
+       do_facet mgs "$LCTL conf_param ${device}.failover.node=" \
+               "$(h2$NETTYPE $FAKENID)" || return 1
  
         local at_max_saved=0
         # adaptive timeouts may prevent seeing the issue
@@ -2650,9 +2919,6 @@ thread_sanity() {
          lassert 28 "$msg" '(($tstarted == $tmin && $tstarted == $tmax ))' || return $?
          cleanup
  
-        # Workaround a YALA bug where YALA expects that modules will remain
-        # loaded on the servers
-        LOAD_MODULES_REMOTE=false
          load_modules
          setup
          cleanup
@@ -2989,5 +3255,5 @@ fi
  
  cleanup_gss
  
-complete $(basename $0) $SECONDS
+complete $SECONDS
  exit_status