Whamcloud - gitweb
LU-11215 tests: replace "large_xattr" with "ea_inode"
[fs/lustre-release.git] / lustre / tests / test-framework.sh
index d27cb30..c7cd6d2 100755 (executable)
@@ -1,25 +1,40 @@
 #!/bin/bash
 
-trap 'print_summary && touch $TF_FAIL && \
-    echo "test-framework exiting on error"' ERR
+trap 'print_summary && print_stack_trace | tee $TF_FAIL && \
+    echo "$TESTSUITE: FAIL: test-framework exiting on error"' ERR
 set -e
-#set -x
 
 export LANG=en_US
 export REFORMAT=${REFORMAT:-""}
 export WRITECONF=${WRITECONF:-""}
 export VERBOSE=${VERBOSE:-false}
-export GSS=false
+export GSS=${GSS:-false}
+export GSS_SK=${GSS_SK:-false}
 export GSS_KRB5=false
 export GSS_PIPEFS=false
+export SHARED_KEY=${SHARED_KEY:-false}
+export SK_PATH=${SK_PATH:-/tmp/test-framework-keys}
+export SK_OM_PATH=$SK_PATH'/tmp-request-mount'
+export SK_MOUNTED=${SK_MOUNTED:-false}
+export SK_FLAVOR=${SK_FLAVOR:-ski}
+export SK_NO_KEY=${SK_NO_KEY:-true}
+export SK_UNIQUE_NM=${SK_UNIQUE_NM:-false}
+export SK_S2S=${SK_S2S:-false}
+export SK_S2SNM=${SK_S2SNM:-TestFrameNM}
+export SK_S2SNMCLI=${SK_S2SNMCLI:-TestFrameNMCli}
 export IDENTITY_UPCALL=default
 export QUOTA_AUTO=1
+export FLAKEY=${FLAKEY:-true}
 # specify environment variable containing batch job name for server statistics
 export JOBID_VAR=${JOBID_VAR:-"procname_uid"}  # or "existing" or "disable"
 
 #export PDSH="pdsh -S -Rssh -w"
 export MOUNT_CMD=${MOUNT_CMD:-"mount -t lustre"}
 export UMOUNT=${UMOUNT:-"umount -d"}
+
+export LSNAPSHOT_CONF="/etc/ldev.conf"
+export LSNAPSHOT_LOG="/var/log/lsnapshot.log"
+
 # sles12 umount has a issue with -d option
 [ -e /etc/SuSE-release ] && grep -w VERSION /etc/SuSE-release | grep -wq 12 && {
        export UMOUNT="umount"
@@ -30,7 +45,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
 . $LUSTRE/tests/functions.sh
 . $LUSTRE/tests/yaml.sh
 
-export LD_LIBRARY_PATH=${LUSTRE}/utils:${LD_LIBRARY_PATH}
+export LD_LIBRARY_PATH=${LUSTRE}/utils/.libs:${LUSTRE}/utils:${LD_LIBRARY_PATH}
 
 LUSTRE_TESTS_CFG_DIR=${LUSTRE_TESTS_CFG_DIR:-${LUSTRE}/tests/cfg}
 
@@ -139,60 +154,62 @@ init_test_env() {
        export FAIL_ON_SKIP_ENV=${FAIL_ON_SKIP_ENV:-false}
        export RPC_MODE=${RPC_MODE:-false}
        export DO_CLEANUP=${DO_CLEANUP:-true}
+       export KEEP_ZPOOL=${KEEP_ZPOOL:-false}
+       export CLEANUP_DM_DEV=false
 
-    export MKE2FS=$MKE2FS
-    if [ -z "$MKE2FS" ]; then
-        if which mkfs.ldiskfs >/dev/null 2>&1; then
-            export MKE2FS=mkfs.ldiskfs
-        else
-            export MKE2FS=mke2fs
-        fi
-    fi
+       export MKE2FS=$MKE2FS
+       if [ -z "$MKE2FS" ]; then
+               if which mkfs.ldiskfs >/dev/null 2>&1; then
+                       export MKE2FS=mkfs.ldiskfs
+               else
+                       export MKE2FS=mke2fs
+               fi
+       fi
 
-    export DEBUGFS=$DEBUGFS
-    if [ -z "$DEBUGFS" ]; then
-        if which debugfs.ldiskfs >/dev/null 2>&1; then
-            export DEBUGFS=debugfs.ldiskfs
-        else
-            export DEBUGFS=debugfs
-        fi
-    fi
+       export DEBUGFS=$DEBUGFS
+       if [ -z "$DEBUGFS" ]; then
+               if which debugfs.ldiskfs >/dev/null 2>&1; then
+                       export DEBUGFS=debugfs.ldiskfs
+               else
+                       export DEBUGFS=debugfs
+               fi
+       fi
 
-    export TUNE2FS=$TUNE2FS
-    if [ -z "$TUNE2FS" ]; then
-        if which tunefs.ldiskfs >/dev/null 2>&1; then
-            export TUNE2FS=tunefs.ldiskfs
-        else
-            export TUNE2FS=tune2fs
-        fi
-    fi
+       export TUNE2FS=$TUNE2FS
+       if [ -z "$TUNE2FS" ]; then
+               if which tunefs.ldiskfs >/dev/null 2>&1; then
+                       export TUNE2FS=tunefs.ldiskfs
+               else
+                       export TUNE2FS=tune2fs
+               fi
+       fi
 
-    export E2LABEL=$E2LABEL
-    if [ -z "$E2LABEL" ]; then
-        if which label.ldiskfs >/dev/null 2>&1; then
-            export E2LABEL=label.ldiskfs
-        else
-            export E2LABEL=e2label
-        fi
-    fi
+       export E2LABEL=$E2LABEL
+       if [ -z "$E2LABEL" ]; then
+               if which label.ldiskfs >/dev/null 2>&1; then
+                       export E2LABEL=label.ldiskfs
+               else
+                       export E2LABEL=e2label
+               fi
+       fi
 
-    export DUMPE2FS=$DUMPE2FS
-    if [ -z "$DUMPE2FS" ]; then
-        if which dumpfs.ldiskfs >/dev/null 2>&1; then
-            export DUMPE2FS=dumpfs.ldiskfs
-        else
-            export DUMPE2FS=dumpe2fs
-        fi
-    fi
+       export DUMPE2FS=$DUMPE2FS
+       if [ -z "$DUMPE2FS" ]; then
+               if which dumpfs.ldiskfs >/dev/null 2>&1; then
+                       export DUMPE2FS=dumpfs.ldiskfs
+               else
+                       export DUMPE2FS=dumpe2fs
+               fi
+       fi
 
-    export E2FSCK=$E2FSCK
-    if [ -z "$E2FSCK" ]; then
-        if which fsck.ldiskfs >/dev/null 2>&1; then
-            export E2FSCK=fsck.ldiskfs
-        else
-            export E2FSCK=e2fsck
-        fi
-    fi
+       export E2FSCK=$E2FSCK
+       if [ -z "$E2FSCK" ]; then
+               if which fsck.ldiskfs >/dev/null 2>&1; then
+                       export E2FSCK=fsck.ldiskfs
+               else
+                        export E2FSCK=e2fsck
+               fi
+       fi
 
        export RESIZE2FS=$RESIZE2FS
        if [ -z "$RESIZE2FS" ]; then
@@ -211,119 +228,142 @@ init_test_env() {
        export ZDB=${ZDB:-zdb}
        export PARTPROBE=${PARTPROBE:-partprobe}
 
-    #[ -d /r ] && export ROOT=${ROOT:-/r}
-    export TMP=${TMP:-$ROOT/tmp}
-    export TESTSUITELOG=${TMP}/${TESTSUITE}.log
-    export LOGDIR=${LOGDIR:-${TMP}/test_logs/$(date +%s)}
-    export TESTLOG_PREFIX=$LOGDIR/$TESTSUITE
+       #[ -d /r ] && export ROOT=${ROOT:-/r}
+       export TMP=${TMP:-$ROOT/tmp}
+       export TESTSUITELOG=${TMP}/${TESTSUITE}.log
+       export LOGDIR=${LOGDIR:-${TMP}/test_logs/$(date +%s)}
+       export TESTLOG_PREFIX=$LOGDIR/$TESTSUITE
 
-    export HOSTNAME=${HOSTNAME:-$(hostname -s)}
-    if ! echo $PATH | grep -q $LUSTRE/utils; then
-        export PATH=$LUSTRE/utils:$PATH
-    fi
-    if ! echo $PATH | grep -q $LUSTRE/utils/gss; then
-        export PATH=$LUSTRE/utils/gss:$PATH
-    fi
-    if ! echo $PATH | grep -q $LUSTRE/tests; then
-        export PATH=$LUSTRE/tests:$PATH
-    fi
-    if ! echo $PATH | grep -q $LUSTRE/../lustre-iokit/sgpdd-survey; then
-        export PATH=$LUSTRE/../lustre-iokit/sgpdd-survey:$PATH
-    fi
-    export LST=${LST:-"$LUSTRE/../lnet/utils/lst"}
-    [ ! -f "$LST" ] && export LST=$(which lst)
-    export SGPDDSURVEY=${SGPDDSURVEY:-"$LUSTRE/../lustre-iokit/sgpdd-survey/sgpdd-survey")}
-    [ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey)
+       export HOSTNAME=${HOSTNAME:-$(hostname -s)}
+       if ! echo $PATH | grep -q $LUSTRE/utils; then
+               export PATH=$LUSTRE/utils:$PATH
+       fi
+       if ! echo $PATH | grep -q $LUSTRE/utils/gss; then
+               export PATH=$LUSTRE/utils/gss:$PATH
+       fi
+       if ! echo $PATH | grep -q $LUSTRE/tests; then
+               export PATH=$LUSTRE/tests:$PATH
+       fi
+       if ! echo $PATH | grep -q $LUSTRE/../lustre-iokit/sgpdd-survey; then
+               export PATH=$LUSTRE/../lustre-iokit/sgpdd-survey:$PATH
+       fi
+       export LST=${LST:-"$LUSTRE/../lnet/utils/lst"}
+       [ ! -f "$LST" ] && export LST=$(which lst)
+       export SGPDDSURVEY=${SGPDDSURVEY:-"$LUSTRE/../lustre-iokit/sgpdd-survey/sgpdd-survey")}
+       [ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey)
        export MCREATE=${MCREATE:-mcreate}
-    # Ubuntu, at least, has a truncate command in /usr/bin
-    # so fully path our truncate command.
-    export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate}
+       export MULTIOP=${MULTIOP:-multiop}
+       # Ubuntu, at least, has a truncate command in /usr/bin
+       # so fully path our truncate command.
+       export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate}
        export FSX=${FSX:-$LUSTRE/tests/fsx}
-    export MDSRATE=${MDSRATE:-"$LUSTRE/tests/mpi/mdsrate"}
-    [ ! -f "$MDSRATE" ] && export MDSRATE=$(which mdsrate 2> /dev/null)
-    if ! echo $PATH | grep -q $LUSTRE/tests/racer; then
-        export PATH=$LUSTRE/tests/racer:$PATH:
-    fi
-    if ! echo $PATH | grep -q $LUSTRE/tests/mpi; then
-        export PATH=$LUSTRE/tests/mpi:$PATH
-    fi
-    export RSYNC_RSH=${RSYNC_RSH:-rsh}
-
-    export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
-    [ ! -f "$LCTL" ] && export LCTL=$(which lctl)
-    export LFS=${LFS:-"$LUSTRE/utils/lfs"}
-    [ ! -f "$LFS" ] && export LFS=$(which lfs)
-    SETSTRIPE=${SETSTRIPE:-"$LFS setstripe"}
-    GETSTRIPE=${GETSTRIPE:-"$LFS getstripe"}
-
-    export L_GETIDENTITY=${L_GETIDENTITY:-"$LUSTRE/utils/l_getidentity"}
-    if [ ! -f "$L_GETIDENTITY" ]; then
-        if `which l_getidentity > /dev/null 2>&1`; then
-            export L_GETIDENTITY=$(which l_getidentity)
-        else
-            export L_GETIDENTITY=NONE
-        fi
-    fi
-    export LL_DECODE_FILTER_FID=${LL_DECODE_FILTER_FID:-"$LUSTRE/utils/ll_decode_filter_fid"}
-    [ ! -f "$LL_DECODE_FILTER_FID" ] && export LL_DECODE_FILTER_FID="ll_decode_filter_fid"
-    export LL_DECODE_LINKEA=${LL_DECODE_LINKEA:-"$LUSTRE/utils/ll_decode_linkea"}
-    [ ! -f "$LL_DECODE_LINKEA" ] && export LL_DECODE_LINKEA="ll_decode_linkea"
-    export MKFS=${MKFS:-"$LUSTRE/utils/mkfs.lustre"}
-    [ ! -f "$MKFS" ] && export MKFS="mkfs.lustre"
-    export TUNEFS=${TUNEFS:-"$LUSTRE/utils/tunefs.lustre"}
-    [ ! -f "$TUNEFS" ] && export TUNEFS="tunefs.lustre"
-    export CHECKSTAT="${CHECKSTAT:-"checkstat -v"} "
-    export LUSTRE_RMMOD=${LUSTRE_RMMOD:-$LUSTRE/scripts/lustre_rmmod}
-    [ ! -f "$LUSTRE_RMMOD" ] &&
-        export LUSTRE_RMMOD=$(which lustre_rmmod 2> /dev/null)
-    export LFS_MIGRATE=${LFS_MIGRATE:-$LUSTRE/scripts/lfs_migrate}
-    [ ! -f "$LFS_MIGRATE" ] &&
-        export LFS_MIGRATE=$(which lfs_migrate 2> /dev/null)
-    export LR_READER=${LR_READER:-"$LUSTRE/utils/lr_reader"}
-    [ ! -f "$LR_READER" ] && export LR_READER=$(which lr_reader 2> /dev/null)
-    [ -z "$LR_READER" ] && export LR_READER="/usr/sbin/lr_reader"
-    export NAME=${NAME:-local}
-    export LGSSD=${LGSSD:-"$LUSTRE/utils/gss/lgssd"}
-    [ "$GSS_PIPEFS" = "true" ] && [ ! -f "$LGSSD" ] && \
-        export LGSSD=$(which lgssd)
-    export LSVCGSSD=${LSVCGSSD:-"$LUSTRE/utils/gss/lsvcgssd"}
-    [ ! -f "$LSVCGSSD" ] && export LSVCGSSD=$(which lsvcgssd 2> /dev/null)
-    export KRB5DIR=${KRB5DIR:-"/usr/kerberos"}
-    export DIR2
-    export SAVE_PWD=${SAVE_PWD:-$LUSTRE/tests}
-    export AT_MAX_PATH
-    export LDEV=${LDEV:-"$LUSTRE/scripts/ldev"}
-    [ ! -f "$LDEV" ] && export LDEV=$(which ldev 2> /dev/null)
-
-    if [ "$ACCEPTOR_PORT" ]; then
-        export PORT_OPT="--port $ACCEPTOR_PORT"
-    fi
+       export MDSRATE=${MDSRATE:-"$LUSTRE/tests/mpi/mdsrate"}
+       [ ! -f "$MDSRATE" ] && export MDSRATE=$(which mdsrate 2> /dev/null)
+       if ! echo $PATH | grep -q $LUSTRE/tests/racer; then
+               export PATH=$LUSTRE/tests/racer:$PATH:
+       fi
+       if ! echo $PATH | grep -q $LUSTRE/tests/mpi; then
+               export PATH=$LUSTRE/tests/mpi:$PATH
+       fi
+       export RSYNC_RSH=${RSYNC_RSH:-rsh}
+
+       export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
+       [ ! -f "$LCTL" ] && export LCTL=$(which lctl)
+       export LFS=${LFS:-"$LUSTRE/utils/lfs"}
+       [ ! -f "$LFS" ] && export LFS=$(which lfs)
+       SETSTRIPE=${SETSTRIPE:-"$LFS setstripe"}
+       GETSTRIPE=${GETSTRIPE:-"$LFS getstripe"}
+
+       export L_GETIDENTITY=${L_GETIDENTITY:-"$LUSTRE/utils/l_getidentity"}
+       if [ ! -f "$L_GETIDENTITY" ]; then
+               if `which l_getidentity > /dev/null 2>&1`; then
+                       export L_GETIDENTITY=$(which l_getidentity)
+               else
+                       export L_GETIDENTITY=NONE
+               fi
+       fi
+       export LL_DECODE_FILTER_FID=${LL_DECODE_FILTER_FID:-"$LUSTRE/utils/ll_decode_filter_fid"}
+       [ ! -f "$LL_DECODE_FILTER_FID" ] && export LL_DECODE_FILTER_FID="ll_decode_filter_fid"
+       export LL_DECODE_LINKEA=${LL_DECODE_LINKEA:-"$LUSTRE/utils/ll_decode_linkea"}
+       [ ! -f "$LL_DECODE_LINKEA" ] && export LL_DECODE_LINKEA="ll_decode_linkea"
+       export MKFS=${MKFS:-"$LUSTRE/utils/mkfs.lustre"}
+       [ ! -f "$MKFS" ] && export MKFS="mkfs.lustre"
+       export TUNEFS=${TUNEFS:-"$LUSTRE/utils/tunefs.lustre"}
+       [ ! -f "$TUNEFS" ] && export TUNEFS="tunefs.lustre"
+       export CHECKSTAT="${CHECKSTAT:-"checkstat -v"} "
+       export LUSTRE_RMMOD=${LUSTRE_RMMOD:-$LUSTRE/scripts/lustre_rmmod}
+       [ ! -f "$LUSTRE_RMMOD" ] &&
+               export LUSTRE_RMMOD=$(which lustre_rmmod 2> /dev/null)
+       export LUSTRE_ROUTES_CONVERSION=${LUSTRE_ROUTES_CONVERSION:-$LUSTRE/scripts/lustre_routes_conversion}
+       [ ! -f "$LUSTRE_ROUTES_CONVERSION" ] &&
+               export LUSTRE_ROUTES_CONVERSION=$(which lustre_routes_conversion 2> /dev/null)
+       export LFS_MIGRATE=${LFS_MIGRATE:-$LUSTRE/scripts/lfs_migrate}
+       [ ! -f "$LFS_MIGRATE" ] &&
+               export LFS_MIGRATE=$(which lfs_migrate 2> /dev/null)
+       export LR_READER=${LR_READER:-"$LUSTRE/utils/lr_reader"}
+       [ ! -f "$LR_READER" ] &&
+               export LR_READER=$(which lr_reader 2> /dev/null)
+       [ -z "$LR_READER" ] && export LR_READER="/usr/sbin/lr_reader"
+       export LSOM_SYNC=${LSOM_SYNC:-"$LUSTRE/utils/llsom_sync"}
+       [ ! -f "$LSOM_SYNC" ] &&
+               export LSOM_SYNC=$(which llsom_sync 2> /dev/null)
+       [ -z "$LSOM_SYNC" ] && export LSOM_SYNC="/usr/sbin/llsom_sync"
+       export NAME=${NAME:-local}
+       export LGSSD=${LGSSD:-"$LUSTRE/utils/gss/lgssd"}
+       [ "$GSS_PIPEFS" = "true" ] && [ ! -f "$LGSSD" ] &&
+               export LGSSD=$(which lgssd)
+       export LSVCGSSD=${LSVCGSSD:-"$LUSTRE/utils/gss/lsvcgssd"}
+       [ ! -f "$LSVCGSSD" ] && export LSVCGSSD=$(which lsvcgssd 2> /dev/null)
+       export KRB5DIR=${KRB5DIR:-"/usr/kerberos"}
+       export DIR2
+       export SAVE_PWD=${SAVE_PWD:-$LUSTRE/tests}
+       export AT_MAX_PATH
+       export LDEV=${LDEV:-"$LUSTRE/scripts/ldev"}
+       [ ! -f "$LDEV" ] && export LDEV=$(which ldev 2> /dev/null)
+
+       export DMSETUP=${DMSETUP:-dmsetup}
+       export DM_DEV_PATH=${DM_DEV_PATH:-/dev/mapper}
+       export LOSETUP=${LOSETUP:-losetup}
+
+       if [ "$ACCEPTOR_PORT" ]; then
+               export PORT_OPT="--port $ACCEPTOR_PORT"
+       fi
+
+       if $SHARED_KEY; then
+               $RPC_MODE || echo "Using GSS shared-key feature"
+               which lgss_sk > /dev/null 2>&1 ||
+                       error_exit "built with lgss_sk disabled! SEC=$SEC"
+               GSS=true
+               GSS_SK=true
+               SEC=$SK_FLAVOR
+       fi
+
+       case "x$SEC" in
+               xkrb5*)
+               $RPC_MODE || echo "Using GSS/krb5 ptlrpc security flavor"
+               which lgss_keyring > /dev/null 2>&1 ||
+                       error_exit "built with gss disabled! SEC=$SEC"
+               GSS=true
+               GSS_KRB5=true
+               ;;
+       esac
+
+       case "x$IDUP" in
+               xtrue)
+                       IDENTITY_UPCALL=true
+                       ;;
+               xfalse)
+                       IDENTITY_UPCALL=false
+                       ;;
+       esac
+
+       export LOAD_MODULES_REMOTE=${LOAD_MODULES_REMOTE:-false}
 
-    case "x$SEC" in
-        xkrb5*)
-            echo "Using GSS/krb5 ptlrpc security flavor"
-            which lgss_keyring > /dev/null 2>&1 || \
-                error_exit "built with gss disabled! SEC=$SEC"
-            GSS=true
-            GSS_KRB5=true
-            ;;
-    esac
-
-    case "x$IDUP" in
-        xtrue)
-            IDENTITY_UPCALL=true
-            ;;
-        xfalse)
-            IDENTITY_UPCALL=false
-            ;;
-    esac
-
-    export LOAD_MODULES_REMOTE=${LOAD_MODULES_REMOTE:-false}
-
-    # Paths on remote nodes, if different
-    export RLUSTRE=${RLUSTRE:-$LUSTRE}
-    export RPWD=${RPWD:-$PWD}
-    export I_MOUNTED=${I_MOUNTED:-"no"}
+       # Paths on remote nodes, if different
+       export RLUSTRE=${RLUSTRE:-$LUSTRE}
+       export RPWD=${RPWD:-$PWD}
+       export I_MOUNTED=${I_MOUNTED:-"no"}
+       export AUSTER_CLEANUP=${AUSTER_CLEANUP:-false}
        if [ ! -f /lib/modules/$(uname -r)/kernel/fs/lustre/mdt.ko -a \
             ! -f /lib/modules/$(uname -r)/updates/kernel/fs/lustre/mdt.ko -a \
             ! -f /lib/modules/$(uname -r)/extra/kernel/fs/lustre/mdt.ko -a \
@@ -391,28 +431,39 @@ export LINUX_VERSION_CODE=$(version_code ${LINUX_VERSION//\./ })
 #
 # All Lustre versions support "lctl get_param" to report the version of the
 # code running in the kernel (what our tests are interested in), but it
-# doesn't work without modules loaded.  If that fails, use "lctl version"
-# instead, which is easy to parse and works without the kernel modules,
-# but was only added in 2.6.50.  If that also fails, fall back to calling
-# "lctl lustre_build_version" which prints either (or both) the userspace
-# and kernel build versions, but is deprecated and should eventually be
-# removed.
+# doesn't work without modules loaded.  After 2.9.53 and in upstream kernels
+# the "version" parameter doesn't include "lustre: " at the beginning.
+# If that fails, call "lctl lustre_build_version" which prints either (or both)
+# the userspace and kernel build versions, but until 2.8.55 required root
+# access to get the Lustre kernel version.  If that also fails, fall back to
+# using "lctl --version", which is easy to parse and works without the kernel
+# modules, but was only added in 2.6.50 and only prints the lctl tool version,
+# not the module version, though they are usually the same.
+#
+# Various commands and their output format for different Lustre versions:
+# lctl get_param version:      2.9.55
+# lctl get_param version:      lustre: 2.8.53
+# lctl get_param version:      lustre: 2.6.52
+#                              kernel: patchless_client
+#                              build: v2_6_92_0-2.6.32-431.el6_lustre.x86_64
+# lctl lustre_build_version:   Lustre version: 2.8.53_27_gae67fc01
+# lctl lustre_build_version:   error: lustre_build_version: Permission denied
+#      (as non-root user)      lctl   version: v2_6_92_0-2.6.32-431.el6.x86_64
+# lctl lustre_build_version:   Lustre version: 2.5.3-2.6.32.26-175.fc12.x86_64
+#                              lctl   version: 2.5.3-2.6.32..26-175fc12.x86_64
+# lctl --version:              lctl 2.6.50
 #
-# output: prints version string to stdout in dotted-decimal format
+# output: prints version string to stdout in (up to 4) dotted-decimal values
 lustre_build_version() {
        local facet=${1:-client}
+       local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null ||
+                               $LCTL lustre_build_version 2>/dev/null ||
+                               $LCTL --version 2>/dev/null | cut -d' ' -f2")
+       local lver=$(egrep -i "lustre: |version: " <<<"$ver" | head -n 1)
+       [ -n "$lver" ] && ver="$lver"
 
-       # lustre: 2.8.52
-       local VER=$(do_facet $facet $LCTL get_param -n version 2> /dev/null |
-                   awk '/lustre: / { print $2 }')
-       # lctl 2.6.50
-       [ -z "$VER" ] && VER=$(do_facet $facet $LCTL --version 2>/dev/null |
-                              awk '{ print $2 }')
-       # Lustre version: 2.5.3-gfcfd782-CHANGED-2.6.32.26-175.fc12.x86_64
-       # lctl   version: 2.5.3-gfcfd782-CHANGED-2.6.32.26-175.fc12.x86_64
-       [ -z "$VER" ] && VER=$(do_facet $facet $LCTL lustre_build_version |
-                              awk '/version:/ { print $3; exit; }')
-       sed -e 's/^v//' -e 's/-.*//' -e 's/_/./g' <<<$VER
+       sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' -e 's/_/./g' <<<$ver |
+               cut -d. -f1-4
 }
 
 # Report the Lustre numeric build version code for the supplied facet.
@@ -424,6 +475,33 @@ module_loaded () {
        /sbin/lsmod | grep -q "^\<$1\>"
 }
 
+PRLFS=false
+lustre_insmod() {
+       local module=$1
+       shift
+       local args="$@"
+       local msg
+       local rc=0
+
+       if ! $PRLFS; then
+               msg="$(insmod $module $args 2>&1)" && return 0 || rc=$?
+       fi
+
+       # parallels can't load modules directly from prlfs, use /tmp instead
+       if $PRLFS || [[ "$(stat -f -c%t $module)" == "7c7c6673" ]]; then
+               local target="$(mktemp)"
+
+               cp "$module" "$target"
+               insmod $target $args
+               rc=$?
+               [[ $rc == 0 ]] && PRLFS=true
+               rm -f $target
+       else
+               echo "$msg"
+       fi
+       return $rc
+}
+
 # Load a module on the system where this is running.
 #
 # usage: load_module module_name [module arguments for insmod/modprobe]
@@ -433,62 +511,91 @@ module_loaded () {
 # /etc/modprobe.conf, from /etc/modprobe.d/Lustre, or else none will be used.
 #
 load_module() {
-    local optvar
-    EXT=".ko"
-    module=$1
-    shift
-    BASE=$(basename $module $EXT)
-
-    module_loaded ${BASE} && return
-
-    # If no module arguments were passed, get them from $MODOPTS_<MODULE>,
-    # else from modprobe.conf
-    if [ $# -eq 0 ]; then
-        # $MODOPTS_<MODULE>; we could use associative arrays, but that's not in
-        # Bash until 4.x, so we resort to eval.
-        optvar="MODOPTS_$(basename $module | tr a-z A-Z)"
-        eval set -- \$$optvar
-        if [ $# -eq 0 -a -n "$MODPROBECONF" ]; then
-               # Nothing in $MODOPTS_<MODULE>; try modprobe.conf
-               local opt
-               opt=$(awk -v var="^options $BASE" '$0 ~ var \
-                       {gsub("'"options $BASE"'",""); print}' $MODPROBECONF)
-               set -- $(echo -n $opt)
-
-               # Ensure we have accept=all for lnet
-               if [ $(basename $module) = lnet ]; then
-                       # OK, this is a bit wordy...
-                       local arg accept_all_present=false
-
-                       for arg in "$@"; do
-                               [ "$arg" = accept=all ] && \
-                                       accept_all_present=true
-                       done
-                       $accept_all_present || set -- "$@" accept=all
+       local module=$1 # '../libcfs/libcfs/libcfs', 'obdclass/obdclass', ...
+       shift
+       local ext=".ko"
+       local base=$(basename $module $ext)
+       local path
+       local -A module_is_loaded_aa
+       local optvar
+       local mod
+
+       for mod in $(lsmod | awk '{ print $1; }'); do
+               module_is_loaded_aa[${mod//-/_}]=true
+       done
+
+       module_is_loaded() {
+               ${module_is_loaded_aa[${1//-/_}]:-false}
+       }
+
+       if module_is_loaded $base; then
+               return
+       fi
+
+       if [[ -f $LUSTRE/$module$ext ]]; then
+               path=$LUSTRE/$module$ext
+       elif [[ "$base" == lnet_selftest ]] &&
+            [[ -f $LUSTRE/../lnet/selftest/$base$ext ]]; then
+               path=$LUSTRE/../lnet/selftest/$base$ext
+       else
+               path=''
+       fi
+
+       if [[ -n "$path" ]]; then
+               # Try to load any non-Lustre modules that $module depends on.
+               for mod in $(modinfo --field=depends $path | tr ',' ' '); do
+                       if ! module_is_loaded $mod; then
+                               modprobe $mod
+                       fi
+               done
+       fi
+
+       # If no module arguments were passed then get them from
+       # $MODOPTS_<MODULE>, otherwise from modprobe.conf.
+       if [ $# -eq 0 ]; then
+               # $MODOPTS_<MODULE>; we could use associative arrays, but that's
+               # not in Bash until 4.x, so we resort to eval.
+               optvar="MODOPTS_$(basename $module | tr a-z A-Z)"
+               eval set -- \$$optvar
+               if [ $# -eq 0 -a -n "$MODPROBECONF" ]; then
+                       # Nothing in $MODOPTS_<MODULE>; try modprobe.conf
+                       local opt
+                       opt=$(awk -v var="^options $base" '$0 ~ var \
+                             {gsub("'"options $base"'",""); print}' \
+                               $MODPROBECONF)
+                       set -- $(echo -n $opt)
+
+                       # Ensure we have accept=all for lnet
+                       if [[ "$base" == lnet ]]; then
+                               # OK, this is a bit wordy...
+                               local arg accept_all_present=false
+
+                               for arg in "$@"; do
+                                       [[ "$arg" == accept=all ]] &&
+                                               accept_all_present=true
+                               done
+
+                               $accept_all_present || set -- "$@" accept=all
+                       fi
+
+                       export $optvar="$*"
                fi
-               export $optvar="$*"
-        fi
-    fi
+       fi
 
-    [ $# -gt 0 ] && echo "${module} options: '$*'"
+       [ $# -gt 0 ] && echo "${module} options: '$*'"
 
        # Note that insmod will ignore anything in modprobe.conf, which is why
-       # we're passing options on the command-line.
-       if [[ "$BASE" == "lnet_selftest" ]] &&
-               [[ -f ${LUSTRE}/../lnet/selftest/${module}${EXT} ]]; then
-               insmod ${LUSTRE}/../lnet/selftest/${module}${EXT}
-       elif [[ -f ${LUSTRE}/${module}${EXT} ]]; then
-               [[ "$BASE" != "ptlrpc_gss" ]] || modprobe sunrpc
-               insmod ${LUSTRE}/${module}${EXT} "$@"
-       else
-               # must be testing a "make install" or "rpm" installation
-               # note failed to load ptlrpc_gss is considered not fatal
-               if [[ "$BASE" == "ptlrpc_gss" ]]; then
-                       modprobe $BASE "$@" 2>/dev/null ||
-                               echo "gss/krb5 is not supported"
-               else
-                       modprobe $BASE "$@"
+       # we're passing options on the command-line. If $path does not exist
+       # then we must be testing a "make install" or"rpm" installation. Also
+       # note that failing to load ptlrpc_gss is not considered fatal.
+       if [[ -n "$path" ]]; then
+               lustre_insmod $path "$@"
+       elif [[ "$base" == ptlrpc_gss ]]; then
+               if ! modprobe $base "$@" 2>/dev/null; then
+                       echo "gss/krb5 is not supported"
                fi
+       else
+               modprobe $base "$@"
        fi
 }
 
@@ -536,44 +643,37 @@ load_modules_local() {
 
        set_default_debug
        load_module ../lnet/lnet/lnet
-       case $NETTYPE in
-       o2ib)
-               LNETLND="o2iblnd/ko2iblnd"
-               ;;
-       *)
-               ;;
-       esac
-    LNETLND=${LNETLND:-"socklnd/ksocklnd"}
-    load_module ../lnet/klnds/$LNETLND
-    load_module obdclass/obdclass
-    load_module ptlrpc/ptlrpc
-    load_module ptlrpc/gss/ptlrpc_gss
-    load_module fld/fld
-    load_module fid/fid
-    load_module lmv/lmv
-    load_module mdc/mdc
-    load_module osc/osc
-    load_module lov/lov
-    load_module mgc/mgc
-    load_module obdecho/obdecho
+
+       LNDPATH=${LNDPATH:-"../lnet/klnds"}
+       if [ -z "$LNETLND" ]; then
+               case $NETTYPE in
+               o2ib*)  LNETLND="o2iblnd/ko2iblnd" ;;
+               tcp*)   LNETLND="socklnd/ksocklnd" ;;
+               *)      local lnd="${NETTYPE%%[0-9]}lnd"
+                       [ -f "$LNDPATH/$lnd/k$lnd.ko" ] &&
+                               LNETLND="$lnd/k$lnd" ||
+                               LNETLND="socklnd/ksocklnd"
+               esac
+       fi
+       load_module ../lnet/klnds/$LNETLND
+       load_module obdclass/obdclass
+       load_module ptlrpc/ptlrpc
+       load_module ptlrpc/gss/ptlrpc_gss
+       load_module fld/fld
+       load_module fid/fid
+       load_module lmv/lmv
+       load_module osc/osc
+       load_module mdc/mdc
+       load_module lov/lov
+       load_module mgc/mgc
+       load_module obdecho/obdecho
        if ! client_only; then
-               SYMLIST=/proc/kallsyms
-               grep -q crc16 $SYMLIST ||
-                       { modprobe crc16 2>/dev/null || true; }
-               grep -q -w jbd2 $SYMLIST ||
-                       { modprobe jbd2 2>/dev/null || true; }
                load_module lfsck/lfsck
                [ "$LQUOTA" != "no" ] &&
                        load_module quota/lquota $LQUOTAOPTS
                if [[ $(node_fstypes $HOSTNAME) == *zfs* ]]; then
-                       modprobe zfs
                        load_module osd-zfs/osd_zfs
-               fi
-               if [[ $(node_fstypes $HOSTNAME) == *ldiskfs* ]]; then
-                       grep -q exportfs_decode_fh $SYMLIST ||
-                               { modprobe exportfs 2> /dev/null || true; }
-                       grep -q -w mbcache $SYMLIST ||
-                               { modprobe mbcache 2>/dev/null || true; }
+               elif [[ $(node_fstypes $HOSTNAME) == *ldiskfs* ]]; then
                        load_module ../ldiskfs/ldiskfs
                        load_module osd-ldiskfs/osd_ldiskfs
                fi
@@ -675,11 +775,14 @@ unload_modules() {
 
 fs_log_size() {
        local facet=${1:-$SINGLEMDS}
-       local fstype=$(facet_fstype $facet)
        local size=0
-       case $fstype in
+
+       case $(facet_fstype $facet) in
                ldiskfs) size=50;; # largest seen is 44, leave some headroom
-               zfs)     size=400;; # largest seen is 384
+               # grant_block_size is in bytes, allow at least 2x max blocksize
+               zfs)     size=$(lctl get_param osc.$FSNAME*.import |
+                               awk '/grant_block_size:/ {print $2/512; exit;}')
+                         ;;
        esac
 
        echo -n $size
@@ -727,88 +830,234 @@ send_sigint() {
     do_nodes $list "killall -2 $@ 2>/dev/null || true"
 }
 
-# start gss daemons on all nodes, or
-# "daemon" on "list" if set
+# start gss daemons on all nodes, or "daemon" on "nodes" if set
 start_gss_daemons() {
-    local list=$1
-    local daemon=$2
+       local nodes=$1
+       local daemon=$2
 
-    if [ "$list" ] && [ "$daemon" ] ; then
-        echo "Starting gss daemon on nodes: $list"
-        do_nodes $list "$daemon" || return 8
-        return 0
-    fi
+       if [ "$nodes" ] && [ "$daemon" ] ; then
+               echo "Starting gss daemon on nodes: $nodes"
+               do_nodes $nodes "$daemon" || return 8
+               return 0
+       fi
 
-    local list=$(comma_list $(mdts_nodes))
-    echo "Starting gss daemon on mds: $list"
-    do_nodes $list "$LSVCGSSD -v" || return 1
-    if $GSS_PIPEFS; then
-        do_nodes $list "$LGSSD -v" || return 2
-    fi
+       nodes=$(comma_list $(mdts_nodes))
+       echo "Starting gss daemon on mds: $nodes"
+       if $GSS_SK; then
+               # Start all versions, in case of switching
+               do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z" || return 1
+       else
+               do_nodes $nodes "$LSVCGSSD -v" || return 1
+       fi
+       if $GSS_PIPEFS; then
+               do_nodes $nodes "$LGSSD -v" || return 2
+       fi
 
-    list=$(comma_list $(osts_nodes))
-    echo "Starting gss daemon on ost: $list"
-    do_nodes $list "$LSVCGSSD -v" || return 3
-    # starting on clients
+       nodes=$(comma_list $(osts_nodes))
+       echo "Starting gss daemon on ost: $nodes"
+       if $GSS_SK; then
+               # Start all versions, in case of switching
+               do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z" || return 3
+       else
+               do_nodes $nodes "$LSVCGSSD -v" || return 3
+       fi
+       # starting on clients
 
-    local clients=${CLIENTS:-`hostname`}
-    if $GSS_PIPEFS; then
-        echo "Starting $LGSSD on clients $clients "
-        do_nodes $clients  "$LGSSD -v" || return 4
-    fi
+       local clients=${CLIENTS:-$HOSTNAME}
+       if $GSS_PIPEFS; then
+               echo "Starting $LGSSD on clients $clients "
+               do_nodes $clients  "$LGSSD -v" || return 4
+       fi
 
-    # wait daemons entering "stable" status
-    sleep 5
-
-    #
-    # check daemons are running
-    #
-    list=$(comma_list $(mdts_nodes) $(osts_nodes))
-    check_gss_daemon_nodes $list lsvcgssd || return 5
-    if $GSS_PIPEFS; then
-        list=$(comma_list $(mdts_nodes))
-        check_gss_daemon_nodes $list lgssd || return 6
-    fi
-    if $GSS_PIPEFS; then
-        check_gss_daemon_nodes $clients lgssd || return 7
-    fi
+       # wait daemons entering "stable" status
+       sleep 5
+
+       #
+       # check daemons are running
+       #
+       nodes=$(comma_list $(mdts_nodes) $(osts_nodes))
+       check_gss_daemon_nodes $nodes lsvcgssd || return 5
+       if $GSS_PIPEFS; then
+               nodes=$(comma_list $(mdts_nodes))
+               check_gss_daemon_nodes $nodes lgssd || return 6
+       fi
+       if $GSS_PIPEFS; then
+               check_gss_daemon_nodes $clients lgssd || return 7
+       fi
 }
 
 stop_gss_daemons() {
-    local list=$(comma_list $(mdts_nodes))
+       local nodes=$(comma_list $(mdts_nodes))
 
-    send_sigint $list lsvcgssd lgssd
+       send_sigint $nodes lsvcgssd lgssd
 
-    list=$(comma_list $(osts_nodes))
-    send_sigint $list lsvcgssd
+       nodes=$(comma_list $(osts_nodes))
+       send_sigint $nodes lsvcgssd
 
-    list=${CLIENTS:-`hostname`}
-    send_sigint $list lgssd
+       nodes=${CLIENTS:-$HOSTNAME}
+       send_sigint $nodes lgssd
+}
+
+add_sk_mntflag() {
+       # Add mount flags for shared key
+       local mt_opts=$@
+       if grep -q skpath <<< "$mt_opts" ; then
+               mt_opts=$(echo $mt_opts |
+                       sed -e "s#skpath=[^ ,]*#skpath=$SK_PATH#")
+       else
+               if [ -z "$mt_opts" ]; then
+                       mt_opts="-o skpath=$SK_PATH"
+               else
+                       mt_opts="$mt_opts,skpath=$SK_PATH"
+               fi
+       fi
+       echo -n $mt_opts
 }
 
 init_gss() {
-    if $GSS; then
-        if ! module_loaded ptlrpc_gss; then
-            load_module ptlrpc/gss/ptlrpc_gss
-            module_loaded ptlrpc_gss ||
-                error_exit "init_gss : GSS=$GSS, but gss/krb5 is not supported!"
-        fi
-        if $GSS_KRB5; then
-                start_gss_daemons || error_exit "start gss daemon failed! rc=$?"
-        fi
+       if $SHARED_KEY; then
+               GSS=true
+               GSS_SK=true
+       fi
+
+       if ! $GSS; then
+               return
+       fi
+
+       if ! module_loaded ptlrpc_gss; then
+               load_module ptlrpc/gss/ptlrpc_gss
+               module_loaded ptlrpc_gss ||
+                       error_exit "init_gss: GSS=$GSS, but gss/krb5 missing"
+       fi
+
+       if $GSS_KRB5 || $GSS_SK; then
+               start_gss_daemons || error_exit "start gss daemon failed! rc=$?"
+       fi
+
+       if $GSS_SK && $SK_NO_KEY; then
+               local numclients=${1:-$CLIENTCOUNT}
+               local clients=${CLIENTS:-$HOSTNAME}
+
+               # security ctx config for keyring
+               SK_NO_KEY=false
+               mkdir -p $SK_OM_PATH
+               mount -o bind $SK_OM_PATH /etc/request-key.d/
+               local lgssc_conf_line='create lgssc * * '
+               lgssc_conf_line+=$(which lgss_keyring)
+               lgssc_conf_line+=' %o %k %t %d %c %u %g %T %P %S'
+
+               local lgssc_conf_file="/etc/request-key.d/lgssc.conf"
+               echo "$lgssc_conf_line" > $lgssc_conf_file
+               [ -e $lgssc_conf_file ] ||
+                       error_exit "Could not find key options in $lgssc_conf_file"
+
+               if ! local_mode; then
+                       do_nodes $(comma_list $(all_nodes)) "mkdir -p \
+                               $SK_OM_PATH"
+                       do_nodes $(comma_list $(all_nodes)) "mount \
+                               -o bind $SK_OM_PATH \
+                               /etc/request-key.d/"
+                       do_nodes $(comma_list $(all_nodes)) "rsync -aqv \
+                               $HOSTNAME:$lgssc_conf_file \
+                               $lgssc_conf_file >/dev/null 2>&1"
+               fi
 
-        if [ -n "$LGSS_KEYRING_DEBUG" ]; then
+               # create shared key on all nodes
+               mkdir -p $SK_PATH/nodemap
+               rm -f $SK_PATH/$FSNAME.key $SK_PATH/nodemap/c*.key \
+                       $SK_PATH/$FSNAME-*.key
+               # for nodemap testing each client may need own key,
+               # and S2S now requires keys as well, both for "client"
+               # and for "server"
+               if $SK_S2S; then
+                       lgss_sk -t server -f$FSNAME -n $SK_S2SNMCLI \
+                               -w $SK_PATH/$FSNAME-nmclient.key \
+                               -d /dev/urandom >/dev/null 2>&1
+                       lgss_sk -t mgs,server -f$FSNAME -n $SK_S2SNM \
+                               -w $SK_PATH/$FSNAME-s2s-server.key \
+                               -d /dev/urandom >/dev/null 2>&1
+               fi
+               # basic key create
+               lgss_sk -t server -f$FSNAME -w $SK_PATH/$FSNAME.key \
+                       -d /dev/urandom >/dev/null 2>&1
+               # per-nodemap keys
+               for i in $(seq 0 $((numclients - 1))); do
+                       lgss_sk -t server -f$FSNAME -n c$i \
+                               -w $SK_PATH/nodemap/c$i.key -d /dev/urandom \
+                               >/dev/null 2>&1
+               done
+               # Distribute keys
+               if ! local_mode; then
+                       do_nodes $(comma_list $(all_nodes)) "rsync -av \
+                               $HOSTNAME:$SK_PATH/ $SK_PATH >/dev/null 2>&1"
+               fi
+               # Set client keys to client type to generate prime P
+               if local_mode; then
+                       do_nodes $(all_nodes) "lgss_sk -t client,server -m \
+                               $SK_PATH/$FSNAME.key >/dev/null 2>&1"
+               else
+                       do_nodes $clients "lgss_sk -t client -m \
+                               $SK_PATH/$FSNAME.key >/dev/null 2>&1"
+                       do_nodes $clients "find $SK_PATH/nodemap -name \*.key | \
+                               xargs -IX lgss_sk -t client -m X >/dev/null 2>&1"
+               fi
+               # This is required for servers as well, if S2S in use
+               if $SK_S2S; then
+                       do_nodes $(comma_list $(mdts_nodes)) \
+                               "cp $SK_PATH/$FSNAME-s2s-server.key \
+                               $SK_PATH/$FSNAME-s2s-client.key; lgss_sk \
+                               -t client -m $SK_PATH/$FSNAME-s2s-client.key \
+                               >/dev/null 2>&1"
+                       do_nodes $(comma_list $(osts_nodes)) \
+                               "cp $SK_PATH/$FSNAME-s2s-server.key \
+                               $SK_PATH/$FSNAME-s2s-client.key; lgss_sk \
+                               -t client -m $SK_PATH/$FSNAME-s2s-client.key \
+                               >/dev/null 2>&1"
+                       do_nodes $clients "lgss_sk -t client \
+                               -m $SK_PATH/$FSNAME-nmclient.key \
+                                >/dev/null 2>&1"
+               fi
+               # mount options for servers and clients
+               MGS_MOUNT_OPTS=$(add_sk_mntflag $MGS_MOUNT_OPTS)
+               MDS_MOUNT_OPTS=$(add_sk_mntflag $MDS_MOUNT_OPTS)
+               OST_MOUNT_OPTS=$(add_sk_mntflag $OST_MOUNT_OPTS)
+               MOUNT_OPTS=$(add_sk_mntflag $MOUNT_OPTS)
+               SEC=$SK_FLAVOR
+       fi
+
+       if [ -n "$LGSS_KEYRING_DEBUG" ]; then
                lctl set_param -n \
-                   sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG
-        fi
-    fi
+                       sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG
+       fi
 }
 
 cleanup_gss() {
-    if $GSS; then
-        stop_gss_daemons
-        # maybe cleanup credential cache?
-    fi
+       if $GSS; then
+               stop_gss_daemons
+               # maybe cleanup credential cache?
+       fi
+}
+
+cleanup_sk() {
+       if $GSS_SK; then
+               if $SK_S2S; then
+                       do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNM"
+                       do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNMCLI"
+                       $RPC_MODE || echo "Sleeping for 10 sec for Nodemap.."
+                       sleep 10
+               fi
+               stop_gss_daemons
+               $RPC_MODE || echo "Cleaning up Shared Key.."
+               do_nodes $(comma_list $(all_nodes)) "rm -f \
+                       $SK_PATH/$FSNAME*.key $SK_PATH/nodemap/$FSNAME*.key"
+               # Remove the mount and clean up the files we added to SK_PATH
+               do_nodes $(comma_list $(all_nodes)) "umount \
+                       /etc/request-key.d/"
+               do_nodes $(comma_list $(all_nodes)) "rm -f \
+                       $SK_OM_PATH/lgssc.conf"
+               do_nodes $(comma_list $(all_nodes)) "rmdir $SK_OM_PATH"
+               SK_NO_KEY=true
+       fi
 }
 
 facet_svc() {
@@ -1027,6 +1276,17 @@ zpool_name() {
 }
 
 #
+#
+# Get ZFS local fsname.
+#
+zfs_local_fsname() {
+       local facet=$1
+       local lfsname=$(basename $(facet_device $facet))
+
+       echo -n $lfsname
+}
+
+#
 # Create ZFS storage pool.
 #
 create_zpool() {
@@ -1036,7 +1296,7 @@ create_zpool() {
        shift 3
        local opts=${@:-"-o cachefile=none"}
 
-       do_facet $facet "modprobe zfs;
+       do_facet $facet "lsmod | grep zfs >&/dev/null || modprobe zfs;
                $ZPOOL list -H $poolname >/dev/null 2>&1 ||
                $ZPOOL create -f $opts $poolname $vdev"
 }
@@ -1096,20 +1356,34 @@ destroy_zpool() {
 import_zpool() {
        local facet=$1
        shift
-       local opts=${@:-"-o cachefile=none"}
+       local opts=${@:-"-o cachefile=none -o failmode=panic"}
        local poolname
 
        poolname=$(zpool_name $facet)
 
        if [[ -n "$poolname" ]]; then
                opts+=" -d $(dirname $(facet_vdevice $facet))"
-               do_facet $facet "modprobe zfs;
+               do_facet $facet "lsmod | grep zfs >&/dev/null || modprobe zfs;
                        $ZPOOL list -H $poolname >/dev/null 2>&1 ||
                        $ZPOOL import -f $opts $poolname"
        fi
 }
 
 #
+# Reimport ZFS storage pool with new name
+#
+reimport_zpool() {
+       local facet=$1
+       local newpool=$2
+       local opts="-o cachefile=none"
+       local poolname=$(zpool_name $facet)
+
+       opts+=" -d $(dirname $(facet_vdevice $facet))"
+       do_facet $facet "$ZPOOL export $poolname;
+                        $ZPOOL import $opts $poolname $newpool"
+}
+
+#
 # Set the "cachefile=none" property on ZFS storage pool so that the pool
 # is not automatically imported on system startup.
 #
@@ -1224,7 +1498,8 @@ mount_facets () {
                [ $RC -eq 0 ] && continue
 
                if [ "$TESTSUITE.$TESTNAME" = "replay-dual.test_0a" ]; then
-                       skip "Restart of $facet failed!." && touch $LU482_FAILED
+                       skip_noexit "Restart of $facet failed!." &&
+                               touch $LU482_FAILED
                else
                        error "Restart of $facet failed!"
                fi
@@ -1252,50 +1527,385 @@ csa_add() {
        echo -n "$opts"
 }
 
+#
+# Associate loop device with a given regular file.
+# Return the loop device.
+#
+setup_loop_device() {
+       local facet=$1
+       local file=$2
+
+       do_facet $facet "loop_dev=\\\$($LOSETUP -j $file | cut -d : -f 1);
+                        if [[ -z \\\$loop_dev ]]; then
+                               loop_dev=\\\$($LOSETUP -f);
+                               $LOSETUP \\\$loop_dev $file || loop_dev=;
+                        fi;
+                        echo -n \\\$loop_dev"
+}
+
+#
+# Detach a loop device.
+#
+cleanup_loop_device() {
+       local facet=$1
+       local loop_dev=$2
+
+       do_facet $facet "! $LOSETUP $loop_dev >/dev/null 2>&1 ||
+                        $LOSETUP -d $loop_dev"
+}
+
+#
+# Check if a given device is a block device.
+#
+is_blkdev() {
+       local facet=$1
+       local dev=$2
+       local size=${3:-""}
+
+       [[ -n "$dev" ]] || return 1
+       do_facet $facet "test -b $dev" || return 1
+       if [[ -n "$size" ]]; then
+               local in=$(do_facet $facet "dd if=$dev of=/dev/null bs=1k \
+                                           count=1 skip=$size 2>&1" |
+                                           awk '($3 == "in") { print $1 }')
+               [[ "$in" = "1+0" ]] || return 1
+       fi
+}
+
+#
+# Check if a given device is a device-mapper device.
+#
+is_dm_dev() {
+       local facet=$1
+       local dev=$2
+
+       [[ -n "$dev" ]] || return 1
+       do_facet $facet "$DMSETUP status $dev >/dev/null 2>&1"
+}
+
+#
+# Check if a given device is a device-mapper flakey device.
+#
+is_dm_flakey_dev() {
+       local facet=$1
+       local dev=$2
+       local type
+
+       [[ -n "$dev" ]] || return 1
+
+       type=$(do_facet $facet "$DMSETUP status $dev 2>&1" |
+              awk '{print $3}')
+       [[ $type = flakey ]] && return 0 || return 1
+}
+
+#
+# Check if device-mapper flakey device is supported by the kernel
+# of $facet node or not.
+#
+dm_flakey_supported() {
+       local facet=$1
+
+       $FLAKEY || return 1
+       do_facet $facet "modprobe dm-flakey;
+                        $DMSETUP targets | grep -q flakey" &> /dev/null
+}
+
+#
+# Get the device-mapper flakey device name of a given facet.
+#
+dm_facet_devname() {
+       local facet=$1
+       [[ $facet = mgs ]] && combined_mgs_mds && facet=mds1
+
+       echo -n ${facet}_flakey
+}
+
+#
+# Get the device-mapper flakey device of a given facet.
+# A device created by dmsetup will appear as /dev/mapper/<device-name>.
+#
+dm_facet_devpath() {
+       local facet=$1
+
+       echo -n $DM_DEV_PATH/$(dm_facet_devname $facet)
+}
+
+#
+# Set a device-mapper device with a new table.
+#
+# The table has the following format:
+# <logical_start_sector> <num_sectors> <target_type> <target_args>
+#
+# flakey <target_args> includes:
+# <destination_device> <offset> <up_interval> <down_interval> \
+# [<num_features> [<feature_arguments>]]
+#
+# linear <target_args> includes:
+# <destination_device> <start_sector>
+#
+dm_set_dev_table() {
+       local facet=$1
+       local dm_dev=$2
+       local target_type=$3
+       local num_sectors
+       local real_dev
+       local tmp
+       local table
+
+       read tmp num_sectors tmp real_dev tmp \
+               <<< $(do_facet $facet "$DMSETUP table $dm_dev")
+
+       case $target_type in
+       flakey)
+               table="0 $num_sectors flakey $real_dev 0 0 1800 1 drop_writes"
+               ;;
+       linear)
+               table="0 $num_sectors linear $real_dev 0"
+               ;;
+       *) error "invalid target type $target_type" ;;
+       esac
+
+       do_facet $facet "$DMSETUP suspend --nolockfs --noflush $dm_dev" ||
+               error "failed to suspend $dm_dev"
+       do_facet $facet "$DMSETUP load $dm_dev --table \\\"$table\\\"" ||
+               error "failed to load $target_type table into $dm_dev"
+       do_facet $facet "$DMSETUP resume $dm_dev" ||
+               error "failed to resume $dm_dev"
+}
+
+#
+# Set a device-mapper flakey device as "read-only" by using the "drop_writes"
+# feature parameter.
+#
+# drop_writes:
+#      All write I/O is silently ignored.
+#      Read I/O is handled correctly.
+#
+dm_set_dev_readonly() {
+       local facet=$1
+       local dm_dev=${2:-$(dm_facet_devpath $facet)}
+
+       dm_set_dev_table $facet $dm_dev flakey
+}
+
+#
+# Set a device-mapper device to traditional linear mapping mode.
+#
+dm_clear_dev_readonly() {
+       local facet=$1
+       local dm_dev=${2:-$(dm_facet_devpath $facet)}
+
+       dm_set_dev_table $facet $dm_dev linear
+}
+
+#
+# Set the device of a given facet as "read-only".
+#
+set_dev_readonly() {
+       local facet=$1
+       local svc=${facet}_svc
+
+       if [[ $(facet_fstype $facet) = zfs ]] ||
+          ! dm_flakey_supported $facet; then
+               do_facet $facet $LCTL --device ${!svc} readonly
+       else
+               dm_set_dev_readonly $facet
+       fi
+}
+
+#
+# Get size in 512-byte sectors (BLKGETSIZE64 / 512) of a given device.
+#
+get_num_sectors() {
+       local facet=$1
+       local dev=$2
+       local num_sectors
+
+       num_sectors=$(do_facet $facet "blockdev --getsz $dev 2>/dev/null")
+       [[ ${PIPESTATUS[0]} = 0 && -n "$num_sectors" ]] || num_sectors=0
+       echo -n $num_sectors
+}
+
+#
+# Create a device-mapper device with a given block device or regular file (will
+# be associated with loop device).
+# Return the full path of the device-mapper device.
+#
+dm_create_dev() {
+       local facet=$1
+       local real_dev=$2                                  # destination device
+       local dm_dev_name=${3:-$(dm_facet_devname $facet)} # device name
+       local dm_dev=$DM_DEV_PATH/$dm_dev_name            # device-mapper device
+
+       # check if the device-mapper device to be created already exists
+       if is_dm_dev $facet $dm_dev; then
+               # if the existing device was set to "read-only", then clear it
+               ! is_dm_flakey_dev $facet $dm_dev ||
+                       dm_clear_dev_readonly $facet $dm_dev
+
+               echo -n $dm_dev
+               return 0
+       fi
+
+       # check if the destination device is a block device, and if not,
+       # associate it with a loop device
+       is_blkdev $facet $real_dev ||
+               real_dev=$(setup_loop_device $facet $real_dev)
+       [[ -n "$real_dev" ]] || { echo -n $real_dev; return 2; }
+
+       # now create the device-mapper device
+       local num_sectors=$(get_num_sectors $facet $real_dev)
+       local table="0 $num_sectors linear $real_dev 0"
+       local rc=0
+
+       do_facet $facet "$DMSETUP create $dm_dev_name --table \\\"$table\\\"" ||
+               { rc=${PIPESTATUS[0]}; dm_dev=; }
+       do_facet $facet "$DMSETUP mknodes >/dev/null 2>&1"
+
+       echo -n $dm_dev
+       return $rc
+}
+
+#
+# Map the facet name to its device variable name.
+#
+facet_device_alias() {
+       local facet=$1
+       local dev_alias=$facet
+
+       case $facet in
+               fs2mds) dev_alias=mds1_2 ;;
+               fs2ost) dev_alias=ost1_2 ;;
+               fs3ost) dev_alias=ost2_2 ;;
+               *) ;;
+       esac
+
+       echo -n $dev_alias
+}
+
+#
+# Save the original value of the facet device and export the new value.
+#
+export_dm_dev() {
+       local facet=$1
+       local dm_dev=$2
+
+       local active_facet=$(facet_active $facet)
+       local dev_alias=$(facet_device_alias $active_facet)
+       local dev_name=${dev_alias}_dev
+       local dev=${!dev_name}
+
+       if [[ $active_facet = $facet ]]; then
+               local failover_dev=${dev_alias}failover_dev
+               if [[ ${!failover_dev} = $dev ]]; then
+                       eval export ${failover_dev}_saved=$dev
+                       eval export ${failover_dev}=$dm_dev
+               fi
+       else
+               dev_alias=$(facet_device_alias $facet)
+               local facet_dev=${dev_alias}_dev
+               if [[ ${!facet_dev} = $dev ]]; then
+                       eval export ${facet_dev}_saved=$dev
+                       eval export ${facet_dev}=$dm_dev
+               fi
+       fi
+
+       eval export ${dev_name}_saved=$dev
+       eval export ${dev_name}=$dm_dev
+}
+
+#
+# Restore the saved value of the facet device.
+#
+unexport_dm_dev() {
+       local facet=$1
+
+       [[ $facet = mgs ]] && combined_mgs_mds && facet=mds1
+       local dev_alias=$(facet_device_alias $facet)
+
+       local saved_dev=${dev_alias}_dev_saved
+       [[ -z ${!saved_dev} ]] ||
+               eval export ${dev_alias}_dev=${!saved_dev}
+
+       saved_dev=${dev_alias}failover_dev_saved
+       [[ -z ${!saved_dev} ]] ||
+               eval export ${dev_alias}failover_dev=${!saved_dev}
+}
+
+#
+# Remove a device-mapper device.
+# If the destination device is a loop device, then also detach it.
+#
+dm_cleanup_dev() {
+       local facet=$1
+       local dm_dev=${2:-$(dm_facet_devpath $facet)}
+       local major
+       local minor
+
+       is_dm_dev $facet $dm_dev || return 0
+
+       read major minor <<< $(do_facet $facet "$DMSETUP table $dm_dev" |
+               awk '{ print $4 }' | awk -F: '{ print $1" "$2 }')
+
+       do_facet $facet "$DMSETUP remove $dm_dev"
+       do_facet $facet "$DMSETUP mknodes >/dev/null 2>&1"
+
+       unexport_dm_dev $facet
+
+       # detach a loop device
+       [[ $major -ne 7 ]] || cleanup_loop_device $facet /dev/loop$minor
+
+       # unload dm-flakey module
+       do_facet $facet "modprobe -r dm-flakey" || true
+}
+
 mount_facet() {
        local facet=$1
        shift
-       local dev=$(facet_active $facet)_dev
+       local active_facet=$(facet_active $facet)
+       local dev_alias=$(facet_device_alias $active_facet)
+       local dev=${dev_alias}_dev
        local opt=${facet}_opt
        local mntpt=$(facet_mntpt $facet)
        local opts="${!opt} $@"
        local fstype=$(facet_fstype $facet)
        local devicelabel
+       local dm_dev=${!dev}
 
        module_loaded lustre || load_modules
 
-       if [ $(facet_fstype $facet) == ldiskfs ] &&
-          ! do_facet $facet test -b ${!dev}; then
-               opts=$(csa_add "$opts" -o loop)
-       fi
+       case $fstype in
+       ldiskfs)
+               if dm_flakey_supported $facet; then
+                       dm_dev=$(dm_create_dev $facet ${!dev})
+                       [[ -n "$dm_dev" ]] || dm_dev=${!dev}
+               fi
 
-       if [[ $(facet_fstype $facet) == zfs ]]; then
+               is_blkdev $facet $dm_dev || opts=$(csa_add "$opts" -o loop)
+
+               devicelabel=$(do_facet ${facet} "$E2LABEL $dm_dev");;
+       zfs)
                # import ZFS storage pool
                import_zpool $facet || return ${PIPESTATUS[0]}
-       fi
 
-       case $fstype in
-       ldiskfs)
-               devicelabel=$(do_facet ${facet} "$E2LABEL ${!dev}");;
-       zfs)
                devicelabel=$(do_facet ${facet} "$ZFS get -H -o value \
-                                               lustre:svname ${!dev}");;
+                                               lustre:svname $dm_dev");;
        *)
                error "unknown fstype!";;
        esac
 
-       echo "Starting ${facet}: $opts ${!dev} $mntpt"
+       echo "Starting ${facet}: $opts $dm_dev $mntpt"
        # for testing LU-482 error handling in mount_facets() and test_0a()
        if [ -f $TMP/test-lu482-trigger ]; then
                RC=2
        else
-               do_facet ${facet} "mkdir -p $mntpt; $MOUNT_CMD $opts \
-                                  ${!dev} $mntpt"
+               do_facet ${facet} \
+                       "mkdir -p $mntpt; $MOUNT_CMD $opts $dm_dev $mntpt"
                RC=${PIPESTATUS[0]}
        fi
 
        if [ $RC -ne 0 ]; then
-               echo "Start of ${!dev} on ${facet} failed ${RC}"
+               echo "Start of $dm_dev on ${facet} failed ${RC}"
                return $RC
        fi
 
@@ -1312,19 +1922,19 @@ mount_facet() {
        fi
 
        if [[ $opts =~ .*nosvc.* ]]; then
-               echo "Start ${!dev} without service"
+               echo "Start $dm_dev without service"
        else
 
                case $fstype in
                ldiskfs)
-                       wait_update_facet ${facet} "$E2LABEL ${!dev} \
+                       wait_update_facet ${facet} "$E2LABEL $dm_dev \
                                2>/dev/null | grep -E ':[a-zA-Z]{3}[0-9]{4}'" \
-                               "" || error "${!dev} failed to initialize!";;
+                               "" || error "$dm_dev failed to initialize!";;
                zfs)
                        wait_update_facet ${facet} "$ZFS get -H -o value \
-                               lustre:svname ${!dev} 2>/dev/null | \
+                               lustre:svname $dm_dev 2>/dev/null | \
                                grep -E ':[a-zA-Z]{3}[0-9]{4}'" "" ||
-                               error "${!dev} failed to initialize!";;
+                               error "$dm_dev failed to initialize!";;
 
                *)
                        error "unknown fstype!";;
@@ -1334,16 +1944,17 @@ mount_facet() {
        # commit the device label change to disk
        if [[ $devicelabel =~ (:[a-zA-Z]{3}[0-9]{4}) ]]; then
                echo "Commit the device label on ${!dev}"
-               do_facet $facet "sync; sync; sync"
-               sleep 5
+               do_facet $facet "sync; sleep 1; sync"
        fi
 
 
-       label=$(devicelabel ${facet} ${!dev})
-       [ -z "$label" ] && echo no label for ${!dev} && exit 1
+       label=$(devicelabel ${facet} $dm_dev)
+       [ -z "$label" ] && echo no label for $dm_dev && exit 1
        eval export ${facet}_svc=${label}
        echo Started ${label}
 
+       export_dm_dev $facet $dm_dev
+
        return $RC
 }
 
@@ -1353,14 +1964,16 @@ start() {
        shift
        local device=$1
        shift
-       eval export ${facet}_dev=${device}
+       local dev_alias=$(facet_device_alias $facet)
+
+       eval export ${dev_alias}_dev=${device}
        eval export ${facet}_opt=\"$@\"
 
-       local varname=${facet}failover_dev
+       local varname=${dev_alias}failover_dev
        if [ -n "${!varname}" ] ; then
-               eval export ${facet}failover_dev=${!varname}
+               eval export ${dev_alias}failover_dev=${!varname}
        else
-               eval export ${facet}failover_dev=$device
+               eval export ${dev_alias}failover_dev=$device
        fi
 
        local mntpt=$(facet_mntpt $facet)
@@ -1379,18 +1992,18 @@ start() {
 }
 
 stop() {
-    local running
-    local facet=$1
-    shift
-    local HOST=`facet_active_host $facet`
-    [ -z $HOST ] && echo stop: no host for $facet && return 0
-
-    local mntpt=$(facet_mntpt $facet)
-    running=$(do_facet ${facet} "grep -c $mntpt' ' /proc/mounts") || true
-    if [ ${running} -ne 0 ]; then
-        echo "Stopping $mntpt (opts:$@) on $HOST"
-       do_facet ${facet} $UMOUNT $@ $mntpt
-    fi
+       local running
+       local facet=$1
+       shift
+       local HOST=$(facet_active_host $facet)
+       [[ -z $HOST ]] && echo stop: no host for $facet && return 0
+
+       local mntpt=$(facet_mntpt $facet)
+       running=$(do_facet ${facet} "grep -c $mntpt' ' /proc/mounts || true")
+       if [ ${running} -ne 0 ]; then
+               echo "Stopping $mntpt (opts:$@) on $HOST"
+               do_facet ${facet} $UMOUNT $@ $mntpt
+       fi
 
        # umount should block, but we should wait for unrelated obd's
        # like the MGS or MGC to also stop.
@@ -1398,7 +2011,14 @@ stop() {
 
        if [[ $(facet_fstype $facet) == zfs ]]; then
                # export ZFS storage pool
-               export_zpool $facet
+               [ "$KEEP_ZPOOL" = "true" ] || export_zpool $facet
+       elif dm_flakey_supported $facet; then
+               local host=${facet}_HOST
+               local failover_host=${facet}failover_HOST
+               if [[ -n ${!failover_host} && ${!failover_host} != ${!host} ]]||
+                       $CLEANUP_DM_DEV || [[ $facet = fs* ]]; then
+                       dm_cleanup_dev $facet
+               fi
        fi
 }
 
@@ -1493,6 +2113,18 @@ mdt_free_inodes() {
        echo $free_inodes
 }
 
+#
+# Get the OST device status from 'lfs df' with a given OST index.
+#
+ost_dev_status() {
+       local ost_idx=$1
+       local mnt_pnt=${2:-$MOUNT}
+       local ost_uuid
+
+       ost_uuid=$(ostuuid_from_index $ost_idx $mnt_pnt)
+       lfs_df $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }'
+}
+
 setup_quota(){
        local mntpt=$1
 
@@ -1568,7 +2200,20 @@ zconf_mount() {
                do_node $client "! grep -q $mnt' ' /proc/mounts ||
                        umount $mnt"
        fi
-       do_node $client $MOUNT_CMD $flags $opts $device $mnt || return 1
+       if $GSS_SK && ($SK_UNIQUE_NM || $SK_S2S); then
+               # Mount using nodemap key
+               local mountkey=$SK_PATH/$FSNAME-nmclient.key
+               if $SK_UNIQUE_NM; then
+                       mountkey=$SK_PATH/nodemap/c0.key
+               fi
+               local prunedopts=$(echo $opts |
+                               sed -e "s#skpath=[^,^ ]*#skpath=$mountkey#g")
+               do_node $client $MOUNT_CMD $flags $prunedopts $device $mnt ||
+                               return 1
+       else
+               do_node $client $MOUNT_CMD $flags $opts $device $mnt ||
+                               return 1
+       fi
 
        set_default_debug_nodes $client
 
@@ -1602,6 +2247,19 @@ zconf_umount() {
     fi
 }
 
+# Mount the file system on the MGS
+mount_mgs_client() {
+       do_facet mgs "mkdir -p $MOUNT"
+       zconf_mount $mgs_HOST $MOUNT $MOUNT_OPTS ||
+               error "unable to mount $MOUNT on MGS"
+}
+
+# Unmount the file system on the MGS
+umount_mgs_client() {
+       zconf_umount $mgs_HOST $MOUNT
+       do_facet mgs "rm -rf $MOUNT"
+}
+
 # nodes is comma list
 sanity_mount_check_nodes () {
     local nodes=$1
@@ -1667,7 +2325,6 @@ zconf_mount_clients() {
        local opts=${3:-$MOUNT_OPTS}
        opts=${opts:+-o $opts}
        local flags=${4:-$MOUNT_FLAGS}
-
        local device=$MGSNID:/$FSNAME$FILESET
        if [ -z "$mnt" -o -z "$FSNAME" ]; then
                echo "Bad conf mount command: opt=$flags $opts dev=$device " \
@@ -1677,10 +2334,46 @@ zconf_mount_clients() {
 
        echo "Starting client $clients: $flags $opts $device $mnt"
        if [ -n "$FILESET" -a ! -n "$SKIP_FILESET" ]; then
-               do_nodes $clients "! grep -q $mnt' ' /proc/mounts ||
-                       umount $mnt"
-               do_nodes $clients $MOUNT_CMD $flags $opts $MGSNID:/$FSNAME \
-                       $mnt || return 1
+               if $GSS_SK && ($SK_UNIQUE_NM || $SK_S2S); then
+                       # Mount with own nodemap key
+                       local i=0
+                       # Mount all server nodes first with per-NM keys
+                       for nmclient in ${clients//,/ }; do
+#                              do_nodes $(comma_list $(all_server_nodes)) "lgss_sk -t server -l $SK_PATH/nodemap/c$i.key -n c$i"
+                               do_nodes $(comma_list $(all_server_nodes)) "lgss_sk -t server -l $SK_PATH/nodemap/c$i.key"
+                               i=$((i + 1))
+                       done
+                       # set perms for per-nodemap keys else permission denied
+                       do_nodes $(comma_list $(all_nodes)) \
+                               "keyctl show | grep lustre | cut -c1-11 |
+                               sed -e 's/ //g;' |
+                               xargs -IX keyctl setperm X 0x3f3f3f3f"
+                       local mountkey=$SK_PATH/$FSNAME-nmclient.key
+                       i=0
+                       for nmclient in ${clients//,/ }; do
+                               if $SK_UNIQUE_NM; then
+                                       mountkey=$SK_PATH/nodemap/c$i.key
+                               fi
+                               do_node $nmclient "! grep -q $mnt' ' \
+                                       /proc/mounts || umount $mnt"
+                               local prunedopts=$(add_sk_mntflag $prunedopts);
+                               prunedopts=$(echo $prunedopts | sed -e \
+                                       "s#skpath=[^ ^,]*#skpath=$mountkey#g")
+                               set -x
+                               do_nodes $(comma_list $(all_server_nodes)) \
+                                       "keyctl show"
+                               set +x
+                               do_node $nmclient $MOUNT_CMD $flags \
+                                       $prunedopts $MGSNID:/$FSNAME $mnt ||
+                                       return 1
+                               i=$((i + 1))
+                       done
+               else
+                       do_nodes $clients "! grep -q $mnt' ' /proc/mounts ||
+                                       umount $mnt"
+                       do_nodes $clients $MOUNT_CMD $flags $opts \
+                                       $MGSNID:/$FSNAME $mnt || return 1
+               fi
                #disable FILESET if not supported
                do_nodes $clients lctl get_param -n \
                        mdc.$FSNAME-MDT0000*.import | grep -q subtree ||
@@ -1690,15 +2383,56 @@ zconf_mount_clients() {
                        umount $mnt"
        fi
 
-       do_nodes $clients "
+       if $GSS_SK && ($SK_UNIQUE_NM || $SK_S2S); then
+               # Mount with nodemap key
+               local i=0
+               local mountkey=$SK_PATH/$FSNAME-nmclient.key
+               for nmclient in ${clients//,/ }; do
+                       if $SK_UNIQUE_NM; then
+                               mountkey=$SK_PATH/nodemap/c$i.key
+                       fi
+                       local prunedopts=$(echo $opts | sed -e \
+                               "s#skpath=[^ ^,]*#skpath=$mountkey#g");
+                       do_node $nmclient "! grep -q $mnt' ' /proc/mounts ||
+                               umount $mnt"
+                       do_node $nmclient "
+               running=\\\$(mount | grep -c $mnt' ');
+               rc=0;
+               if [ \\\$running -eq 0 ] ; then
+                       mkdir -p $mnt;
+                       $MOUNT_CMD $flags $prunedopts $device $mnt;
+                       rc=\\\$?;
+               else
+                       lustre_mnt_count=\\\$(mount | grep $mnt' ' | \
+                               grep 'type lustre' | wc -l);
+                       if [ \\\$running -ne \\\$lustre_mnt_count ] ; then
+                               echo zconf_mount_clients FAILED: \
+                                       mount count \\\$running, not matching \
+                                       with mount count of 'type lustre' \
+                                       \\\$lustre_mnt_count;
+                               rc=1;
+                       fi;
+               fi;
+       exit \\\$rc" || return ${PIPESTATUS[0]}
+
+                       i=$((i + 1))
+               done
+       else
+
+               local tmpopts=$opts
+               if $SHARED_KEY; then
+                       tmpopts=$(add_sk_mntflag $opts)
+               fi
+               do_nodes $clients "
 running=\\\$(mount | grep -c $mnt' ');
 rc=0;
 if [ \\\$running -eq 0 ] ; then
-    mkdir -p $mnt;
-    $MOUNT_CMD $flags $opts $device $mnt;
-    rc=\\\$?;
+       mkdir -p $mnt;
+       $MOUNT_CMD $flags $tmpopts $device $mnt;
+       rc=\\\$?;
 fi;
 exit \\\$rc" || return ${PIPESTATUS[0]}
+       fi
 
        echo "Started clients $clients: "
        do_nodes $clients "mount | grep $mnt' '"
@@ -1803,13 +2537,23 @@ facets_up_on_host () {
 }
 
 shutdown_facet() {
-    local facet=$1
+       local facet=$1
+       local affected_facet
+       local affected_facets
+
+       if [[ "$FAILURE_MODE" = HARD ]]; then
+               if [[ $(facet_fstype $facet) = ldiskfs ]] &&
+                       dm_flakey_supported $facet; then
+                       affected_facets=$(affected_facets $facet)
+                       for affected_facet in ${affected_facets//,/ }; do
+                               unexport_dm_dev $affected_facet
+                       done
+               fi
 
-    if [ "$FAILURE_MODE" = HARD ]; then
-        shutdown_node_hard $(facet_active_host $facet)
-    else
-        stop $facet
-    fi
+               shutdown_node_hard $(facet_active_host $facet)
+       else
+               stop $facet
+       fi
 }
 
 reboot_node() {
@@ -1879,29 +2623,31 @@ node_var_name() {
 }
 
 start_client_load() {
-    local client=$1
-    local load=$2
-    local var=$(node_var_name $client)_load
-    eval export ${var}=$load
-
-    do_node $client "PATH=$PATH MOUNT=$MOUNT ERRORS_OK=$ERRORS_OK \
-BREAK_ON_ERROR=$BREAK_ON_ERROR \
-END_RUN_FILE=$END_RUN_FILE \
-LOAD_PID_FILE=$LOAD_PID_FILE \
-TESTLOG_PREFIX=$TESTLOG_PREFIX \
-TESTNAME=$TESTNAME \
-DBENCH_LIB=$DBENCH_LIB \
-DBENCH_SRC=$DBENCH_SRC \
-CLIENT_COUNT=$((CLIENTCOUNT - 1)) \
-LFS=$LFS \
-run_${load}.sh" &
-    local ppid=$!
-    log "Started client load: ${load} on $client"
-
-    # get the children process IDs
-    local pids=$(ps --ppid $ppid -o pid= | xargs)
-    CLIENT_LOAD_PIDS="$CLIENT_LOAD_PIDS $ppid $pids"
-    return 0
+       local client=$1
+       local load=$2
+       local var=$(node_var_name $client)_load
+       eval export ${var}=$load
+
+       do_node $client "PATH=$PATH MOUNT=$MOUNT ERRORS_OK=$ERRORS_OK \
+                       BREAK_ON_ERROR=$BREAK_ON_ERROR \
+                       END_RUN_FILE=$END_RUN_FILE \
+                       LOAD_PID_FILE=$LOAD_PID_FILE \
+                       TESTLOG_PREFIX=$TESTLOG_PREFIX \
+                       TESTNAME=$TESTNAME \
+                       DBENCH_LIB=$DBENCH_LIB \
+                       DBENCH_SRC=$DBENCH_SRC \
+                       CLIENT_COUNT=$((CLIENTCOUNT - 1)) \
+                       LFS=$LFS \
+                       LCTL=$LCTL \
+                       FSNAME=$FSNAME \
+                       run_${load}.sh" &
+       local ppid=$!
+       log "Started client load: ${load} on $client"
+
+       # get the children process IDs
+       local pids=$(ps --ppid $ppid -o pid= | xargs)
+       CLIENT_LOAD_PIDS="$CLIENT_LOAD_PIDS $ppid $pids"
+       return 0
 }
 
 start_client_loads () {
@@ -2161,25 +2907,25 @@ wait_update_facet() {
 
 sync_all_data() {
        do_nodes $(comma_list $(mdts_nodes)) \
-           "lctl set_param -n osd*.*MDT*.force_sync=1"
+           "lctl set_param -n os[cd]*.*MDT*.force_sync=1"
        do_nodes $(comma_list $(osts_nodes)) \
            "lctl set_param -n osd*.*OS*.force_sync=1" 2>&1 |
                grep -v 'Found no match'
 }
 
 wait_zfs_commit() {
+       local zfs_wait=${2:-5}
+
        # the occupied disk space will be released
-       # only after DMUs are committed
+       # only after TXGs are committed
        if [[ $(facet_fstype $1) == zfs ]]; then
-               echo "sleep $2 for ZFS OSD"
-               sleep $2
+               echo "sleep $zfs_wait for ZFS $(facet_fstype $1)"
+               sleep $zfs_wait
        fi
 }
 
 wait_delete_completed_mds() {
-       local MAX_WAIT=${1:-20}
-       # for ZFS, waiting more time for DMUs to be committed
-       local ZFS_WAIT=${2:-5}
+       local max_wait=${1:-20}
        local mds2sync=""
        local stime=$(date +%s)
        local etime
@@ -2196,34 +2942,39 @@ wait_delete_completed_mds() {
                mds2sync="$mds2sync $node"
        done
        if [ -z "$mds2sync" ]; then
-               wait_zfs_commit $SINGLEMDS $ZFS_WAIT
-               return
+               wait_zfs_commit $SINGLEMDS
+               return 0
        fi
        mds2sync=$(comma_list $mds2sync)
 
        # sync MDS transactions
-       do_nodes $mds2sync "$LCTL set_param -n osd*.*MD*.force_sync 1"
+       do_nodes $mds2sync "$LCTL set_param -n os[cd]*.*MD*.force_sync 1"
 
        # wait till all changes are sent and commmitted by OSTs
        # for ldiskfs space is released upon execution, but DMU
        # do this upon commit
 
        local WAIT=0
-       while [[ $WAIT -ne $MAX_WAIT ]]; do
+       while [[ $WAIT -ne $max_wait ]]; do
                changes=$(do_nodes $mds2sync \
                        "$LCTL get_param -n osc.*MDT*.sync_*" | calc_sum)
                #echo "$node: $changes changes on all"
                if [[ $changes -eq 0 ]]; then
-                       wait_zfs_commit $SINGLEMDS $ZFS_WAIT
-                       return
+                       wait_zfs_commit $SINGLEMDS
+
+                       # the occupied disk space will be released
+                       # only after TXGs are committed
+                       wait_zfs_commit ost1
+                       return 0
                fi
                sleep 1
-               WAIT=$(( WAIT + 1))
+               WAIT=$((WAIT + 1))
        done
 
        etime=$(date +%s)
        echo "Delete is not completed in $((etime - stime)) seconds"
        do_nodes $mds2sync "$LCTL get_param osc.*MDT*.sync_*"
+       return 1
 }
 
 wait_for_host() {
@@ -2331,6 +3082,7 @@ wait_mds_ost_sync () {
        done
 
        # show which nodes are not finished.
+       cmd=$(echo $cmd | sed 's/-n//')
        do_nodes $list "$cmd"
        echo "$facet recovery node $i not done in $WAIT_TIMEOUT sec. $STATUS"
        return 1
@@ -2343,10 +3095,10 @@ wait_osts_up() {
        wait_update $HOSTNAME "eval $cmd" $OSTCOUNT ||
                error "wait_update OSTs up on client failed"
 
-       cmd="$LCTL get_param -n lod.$FSNAME-MDT*-*.target_obd | sort -u |
-            awk 'BEGIN {c = 0} /ACTIVE/{c += 1} END {printf \\\"%d\\\", c}'"
+       cmd="$LCTL get_param osp.$FSNAME-OST*-MDT0000.prealloc_last_id |
+            awk '/=[1-9][0-9]/ { c += 1 } END { printf \\\"%d\\\", c }'"
        wait_update_facet $SINGLEMDS "eval $cmd" $OSTCOUNT ||
-               error "wait_update OSTs up on MDT failed"
+               error "wait_update OSTs up on MDT0000 failed"
 }
 
 wait_destroy_complete () {
@@ -2378,7 +3130,7 @@ wait_destroy_complete () {
 
 wait_delete_completed() {
        wait_delete_completed_mds $1 || return $?
-       wait_destroy_complete
+       wait_destroy_complete || return $?
 }
 
 wait_exit_ST () {
@@ -2389,7 +3141,8 @@ wait_exit_ST () {
     local running
     # conf-sanity 31 takes a long time cleanup
     while [ $WAIT -lt 300 ]; do
-        running=$(do_facet ${facet} "lsmod | grep lnet > /dev/null && lctl dl | grep ' ST '") || true
+       running=$(do_facet ${facet} "lsmod | grep lnet > /dev/null &&
+lctl dl | grep ' ST ' || true")
         [ -z "${running}" ] && return 0
         echo "waited $WAIT for${running}"
         [ $INTERVAL -lt 64 ] && INTERVAL=$((INTERVAL + INTERVAL))
@@ -2444,7 +3197,6 @@ lfs_df_check() {
        fi
 }
 
-
 clients_up() {
        # not every config has many clients
        sleep 1
@@ -2592,7 +3344,7 @@ replay_barrier() {
        # handled by stop() and mount_facet() separately, which are used
        # inside fail() and fail_abort().
        #
-       do_facet $facet $LCTL --device ${!svc} readonly
+       set_dev_readonly $facet
        do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}"
        $LCTL mark "local REPLAY BARRIER on ${!svc}"
 }
@@ -2603,7 +3355,7 @@ replay_barrier_nodf() {
        local svc=${facet}_svc
        echo Replay barrier on ${!svc}
        do_facet $facet $LCTL --device ${!svc} notransno
-       do_facet $facet $LCTL --device ${!svc} readonly
+       set_dev_readonly $facet
        do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}"
        $LCTL mark "local REPLAY BARRIER on ${!svc}"
 }
@@ -2613,7 +3365,7 @@ replay_barrier_nosync() {
        local svc=${facet}_svc
        echo Replay barrier on ${!svc}
        do_facet $facet $LCTL --device ${!svc} notransno
-       do_facet $facet $LCTL --device ${!svc} readonly
+       set_dev_readonly $facet
        do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}"
        $LCTL mark "local REPLAY BARRIER on ${!svc}"
 }
@@ -2651,7 +3403,9 @@ fail() {
        local clients=${CLIENTS:-$HOSTNAME}
 
        facet_failover $* || error "failover: $?"
-       wait_clients_import_state "$clients" "$facets" FULL
+       # to initiate all OSC idling connections
+       clients_up
+       wait_clients_import_state "$clients" "$facets" "\(FULL\|IDLE\)"
        clients_up || error "post-failover stat: $?"
 }
 
@@ -2670,21 +3424,11 @@ fail_abort() {
        clients_up || error "post-failover stat: $?"
 }
 
-do_lmc() {
-    echo There is no lmc.  This is mountconf, baby.
-    exit 1
-}
-
 host_nids_address() {
-    local nodes=$1
-    local kind=$2
+       local nodes=$1
+       local net=${2:-"."}
 
-    if [ -n "$kind" ]; then
-        nids=$(do_nodes $nodes "$LCTL list_nids | grep $kind | cut -f 1 -d '@'")
-    else
-        nids=$(do_nodes $nodes "$LCTL list_nids all | cut -f 1 -d '@'")
-    fi
-    echo $nids
+       do_nodes $nodes "$LCTL list_nids | grep $net | cut -f 1 -d @"
 }
 
 h2name_or_ip() {
@@ -2693,40 +3437,34 @@ h2name_or_ip() {
        fi
 }
 
-h2ptl() {
-       if [ "$1" = "'*'" ]; then echo \'*\'; else
-               ID=`xtprocadmin -n $1 2>/dev/null | egrep -v 'NID' | \
-                                                       awk '{print $1}'`
-               if [ -z "$ID" ]; then
-                       echo "Could not get a ptl id for $1..."
-                       exit 1
-               fi
-               echo $ID"@ptl"
+h2nettype() {
+       if [[ -n "$NETTYPE" ]]; then
+               h2name_or_ip "$1" "$NETTYPE"
+       else
+               h2name_or_ip "$1" "$2"
        fi
 }
-declare -fx h2ptl
+declare -fx h2nettype
 
+# Wrapper function to print the deprecation warning
 h2tcp() {
-       h2name_or_ip "$1" "tcp"
-}
-declare -fx h2tcp
-
-h2elan() {
-       if [ "$1" = "'*'" ]; then echo \'*\'; else
-               if type __h2elan >/dev/null 2>&1; then
-                       ID=$(__h2elan $1)
-               else
-                       ID=`echo $1 | sed 's/[^0-9]*//g'`
-               fi
-               echo $ID"@elan"
+       echo "h2tcp: deprecated, use h2nettype instead" 1>&2
+       if [[ -n "$NETTYPE" ]]; then
+               h2nettype "$@"
+       else
+               h2nettype "$1" "tcp"
        fi
 }
-declare -fx h2elan
 
+# Wrapper function to print the deprecation warning
 h2o2ib() {
-       h2name_or_ip "$1" "o2ib"
+       echo "h2o2ib: deprecated, use h2nettype instead" 1>&2
+       if [[ -n "$NETTYPE" ]]; then
+               h2nettype "$@"
+       else
+               h2nettype "$1" "o2ib"
+       fi
 }
-declare -fx h2o2ib
 
 # This enables variables in cfg/"setup".sh files to support the pdsh HOSTLIST
 # expressions format. As a bonus we can then just pass in those variables
@@ -2762,6 +3500,8 @@ hostlist_expand() {
                 group=${group%%]*}
 
                 for range in ${group//,/ }; do
+                   local order
+
                     begin=${range%-*}
                     end=${range#*-}
 
@@ -2777,7 +3517,13 @@ hostlist_expand() {
                     begin=$(echo $begin | sed 's/0*//')
                     [ -z $begin ] && begin=0
 
-                    for num in $(seq -f "%0${padlen}g" $begin $end); do
+                   if [ ! -z "${begin##[!0-9]*}" ]; then
+                       order=$(seq -f "%0${padlen}g" $begin $end)
+                   else
+                       order=$(eval echo {$begin..$end});
+                   fi
+
+                   for num in $order; do
                         value="${name#*,}${num}${back}"
                         [ "$value" != "${value/\[/}" ] && {
                             value=$(hostlist_expand "$value")
@@ -2798,8 +3544,10 @@ hostlist_expand() {
     myList="${list%% *}"
 
     while [[ "$list" != ${myList##* } ]]; do
-        list=${list//${list%% *} /}
-        myList="$myList ${list%% *}"
+       local tlist=" $list"
+       list=${tlist// ${list%% *} / }
+       list=${list:1}
+       myList="$myList ${list%% *}"
     done
     myList="${myList%* }";
 
@@ -2951,16 +3699,20 @@ do_node() {
         $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
     fi
 
-    if [ "$myPDSH" = "rsh" ]; then
-# we need this because rsh does not return exit code of an executed command
-        local command_status="$TMP/cs"
-        rsh $HOST ":> $command_status"
-        rsh $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;
-                    cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\") ||
-                    echo command failed >$command_status"
-        [ -n "$($myPDSH $HOST cat $command_status)" ] && return 1 || true
-        return 0
-    fi
+       if [[ "$myPDSH" == "rsh" ]] ||
+          [[ "$myPDSH" == *pdsh* && "$myPDSH" != *-S* ]]; then
+               # we need this because rsh and pdsh do not return
+               # exit code of an executed command
+               local command_status="$TMP/cs"
+               eval $myPDSH $HOST ":> $command_status"
+               eval $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests;
+                                    PATH=\$PATH:/sbin:/usr/sbin;
+                                    cd $RPWD;
+                                    LUSTRE=\"$RLUSTRE\" sh -c \"$@\") ||
+                                    echo command failed >$command_status"
+               [[ -n "$($myPDSH $HOST cat $command_status)" ]] && return 1 ||
+                       return 0
+       fi
 
     if $verbose ; then
         # print HOSTNAME for myPDSH="no_dsh"
@@ -3137,6 +3889,8 @@ ostdevname() {
 
        case $fstype in
                ldiskfs )
+                       local dev=ost${num}_dev
+                       [[ -n ${!dev} ]] && eval DEVPTR=${!dev} ||
                        #if $OSTDEVn isn't defined, default is $OSTDEVBASE + num
                        eval DEVPTR=${!DEVNAME:=${OSTDEVBASE}${num}};;
                zfs )
@@ -3183,6 +3937,8 @@ mdsdevname() {
 
        case $fstype in
                ldiskfs )
+                       local dev=mds${num}_dev
+                       [[ -n ${!dev} ]] && eval DEVPTR=${!dev} ||
                        #if $MDSDEVn isn't defined, default is $MDSDEVBASE{n}
                        eval DEVPTR=${!DEVNAME:=${MDSDEVBASE}${num}};;
                zfs )
@@ -3225,9 +3981,10 @@ mgsdevname() {
        case $fstype in
        ldiskfs )
                if [ $(facet_host mgs) = $(facet_host mds1) ] &&
-                  ( [ -z "$MGSDEV" ] || [ $MGSDEV = $(mdsdevname 1) ] ); then
+                  ( [ -z "$MGSDEV" ] || [ $MGSDEV = $MDSDEV1 ] ); then
                        DEVPTR=$(mdsdevname 1)
                else
+                       [[ -n $mgs_dev ]] && DEVPTR=$mgs_dev ||
                        DEVPTR=$MGSDEV
                fi;;
        zfs )
@@ -3284,11 +4041,16 @@ mount_ldiskfs() {
        local dev=$(facet_device $facet)
        local mnt=${2:-$(facet_mntpt $facet)}
        local opts
+       local dm_dev=$dev
 
-       if ! do_facet $facet test -b $dev; then
-               opts="-o loop"
+       if dm_flakey_supported $facet; then
+               dm_dev=$(dm_create_dev $facet $dev)
+               [[ -n "$dm_dev" ]] || dm_dev=$dev
        fi
-       do_facet $facet mount -t ldiskfs $opts $dev $mnt
+       is_blkdev $facet $dm_dev || opts=$(csa_add "$opts" -o loop)
+       export_dm_dev $facet $dm_dev
+
+       do_facet $facet mount -t ldiskfs $opts $dm_dev $mnt
 }
 
 unmount_ldiskfs() {
@@ -3411,14 +4173,15 @@ cleanup_echo_devs () {
 }
 
 cleanupall() {
-    nfs_client_mode && return
+       nfs_client_mode && return
        cifs_client_mode && return
 
-    stopall $*
-    cleanup_echo_devs
+       cleanup_echo_devs
+       CLEANUP_DM_DEV=true stopall $*
 
-    unload_modules
-    cleanup_gss
+       unload_modules
+       cleanup_sk
+       cleanup_gss
 }
 
 combined_mgs_mds () {
@@ -3461,7 +4224,7 @@ mkfs_opts() {
 
        var=${facet}failover_HOST
        if [ -n "${!var}" ] && [ ${!var} != $(facet_host $facet) ]; then
-               opts+=" --failnode=$(h2$NETTYPE ${!var})"
+               opts+=" --failnode=$(h2nettype ${!var})"
        fi
 
        opts+=${TIMEOUT:+" --param=sys.timeout=$TIMEOUT"}
@@ -3469,15 +4232,15 @@ mkfs_opts() {
 
        if [ $type == MDS ]; then
                opts+=${MDSCAPA:+" --param-mdt.capa=$MDSCAPA"}
-               opts+=${STRIPE_BYTES:+" --param=lov.stripesize=$STRIPE_BYTES"}
-               opts+=${STRIPES_PER_OBJ:+" --param=lov.stripecount=$STRIPES_PER_OBJ"}
+               opts+=${DEF_STRIPE_SIZE:+" --param=lov.stripesize=$DEF_STRIPE_SIZE"}
+               opts+=${DEF_STRIPE_COUNT:+" --param=lov.stripecount=$DEF_STRIPE_COUNT"}
                opts+=${L_GETIDENTITY:+" --param=mdt.identity_upcall=$L_GETIDENTITY"}
 
                if [ $fstype == ldiskfs ]; then
                        # Check for wide striping
                        if [ $OSTCOUNT -gt 160 ]; then
                                MDSJOURNALSIZE=${MDSJOURNALSIZE:-4096}
-                               fs_mkfs_opts+="-O large_xattr"
+                               fs_mkfs_opts+="-O ea_inode"
                        fi
 
                        var=${facet}_JRN
@@ -3526,6 +4289,17 @@ mkfs_opts() {
        echo -n "$opts"
 }
 
+mountfs_opts() {
+       local facet=$1
+       local type=$(facet_type $facet)
+       local var=${type}_MOUNT_FS_OPTS
+       local opts=""
+       if [ -n "${!var}" ]; then
+               opts+=" --mountfsoptions=${!var}"
+       fi
+       echo -n "$opts"
+}
+
 check_ost_indices() {
        local index_count=${#OST_INDICES[@]}
        [[ $index_count -eq 0 || $OSTCOUNT -le $index_count ]] && return 0
@@ -3544,6 +4318,34 @@ check_ost_indices() {
        done
 }
 
+__touch_device()
+{
+       local facet_type=$1 # mgs || mds || ost
+       local facet_num=$2
+       local facet=${1}${2}
+       local device
+
+       case "$(facet_fstype $facet)" in
+       ldiskfs)
+               device=$(${facet_type}devname $facet_num)
+               ;;
+       zfs)
+               device=$(${facet_type}vdevname $facet_num)
+               ;;
+       *)
+               error "Unhandled filesystem type"
+               ;;
+       esac
+
+       do_facet $facet "[ -e \"$device\" ]" && return
+
+       # Note: the following check only works with absolute paths
+       [[ ! "$device" =~ ^/dev/ ]] || [[ "$device" =~ ^/dev/shm/ ]] ||
+               error "$facet: device '$device' does not exist"
+
+       do_facet $facet "touch \"${device}\""
+}
+
 format_mgs() {
        local quiet
 
@@ -3552,7 +4354,13 @@ format_mgs() {
        fi
        echo "Format mgs: $(mgsdevname)"
        reformat_external_journal mgs
-       add mgs $(mkfs_opts mgs $(mgsdevname)) --reformat \
+
+       # touch "device" in case it is a loopback file for testing and needs to
+       # be created. mkfs.lustre doesn't do this to avoid accidentally writing
+       # to non-existent files in /dev if the admin made a typo during setup
+       __touch_device mgs
+
+       add mgs $(mkfs_opts mgs $(mgsdevname)) $(mountfs_opts mgs) --reformat \
                $(mgsdevname) $(mgsvdevname) ${quiet:+>/dev/null} || exit 10
 }
 
@@ -3565,9 +4373,12 @@ format_mdt() {
        fi
        echo "Format mds$num: $(mdsdevname $num)"
        reformat_external_journal mds$num
+
+       __touch_device mds $num
+
        add mds$num $(mkfs_opts mds$num $(mdsdevname ${num})) \
-               --reformat $(mdsdevname $num) $(mdsvdevname $num) \
-               ${quiet:+>/dev/null} || exit 10
+               $(mountfs_opts mds$num) --reformat $(mdsdevname $num) \
+               $(mdsvdevname $num) ${quiet:+>/dev/null} || exit 10
 }
 
 format_ost() {
@@ -3578,13 +4389,16 @@ format_ost() {
        fi
        echo "Format ost$num: $(ostdevname $num)"
        reformat_external_journal ost$num
+
+       __touch_device ost $num
+
        add ost$num $(mkfs_opts ost$num $(ostdevname ${num})) \
-               --reformat $(ostdevname $num) $(ostvdevname ${num}) \
-               ${quiet:+>/dev/null} || exit 10
+               $(mountfs_opts ost$num) --reformat $(ostdevname $num) \
+               $(ostvdevname ${num}) ${quiet:+>/dev/null} || exit 10
 }
 
 formatall() {
-       stopall
+       stopall -f
        # Set hostid for ZFS/SPL zpool import protection
        # (Assumes MDS version is also OSS version)
        if [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.8.54) ];
@@ -3612,11 +4426,11 @@ formatall() {
 }
 
 mount_client() {
-    grep " $1 " /proc/mounts || zconf_mount $HOSTNAME $*
+       grep " $1 " /proc/mounts || zconf_mount $HOSTNAME $*
 }
 
 umount_client() {
-    grep " $1 " /proc/mounts && zconf_umount `hostname` $*
+       grep " $1 " /proc/mounts && zconf_umount $HOSTNAME $*
 }
 
 # return value:
@@ -3652,8 +4466,8 @@ switch_identity() {
 
 remount_client()
 {
-        zconf_umount `hostname` $1 || error "umount failed"
-        zconf_mount `hostname` $1 || error "mount failed"
+       zconf_umount $HOSTNAME $1 || error "umount failed"
+       zconf_mount $HOSTNAME $1 || error "mount failed"
 }
 
 writeconf_facet() {
@@ -3683,7 +4497,92 @@ writeconf_all () {
        return $rc
 }
 
+mountmgs() {
+       if ! combined_mgs_mds ; then
+               start mgs $(mgsdevname) $MGS_MOUNT_OPTS
+       fi
+}
+
+mountmds() {
+       local num
+       local devname
+       local host
+       local varname
+       for num in $(seq $MDSCOUNT); do
+               devname=$(mdsdevname $num)
+               start mds$num $devname $MDS_MOUNT_OPTS
+
+               # We started mds$num, now we should set mds${num}_HOST
+               # and mds${num}failover_HOST variables properly if they
+               # are not set.
+               host=$(facet_host mds$num)
+               for varname in mds${num}_HOST mds${num}failover_HOST; do
+                       if [[ -z "${!varname}" ]]; then
+                               eval $varname=$host
+                       fi
+               done
+               if [ $IDENTITY_UPCALL != "default" ]; then
+                       switch_identity $num $IDENTITY_UPCALL
+               fi
+       done
+}
+
+mountoss() {
+       local num
+       local devname
+       local host
+       local varname
+       for num in $(seq $OSTCOUNT); do
+               devname=$(ostdevname $num)
+               start ost$num $devname $OST_MOUNT_OPTS
+
+               # We started ost$num, now we should set ost${num}_HOST
+               # and ost${num}failover_HOST variables properly if they
+               # are not set.
+               host=$(facet_host ost$num)
+               for varname in ost${num}_HOST ost${num}failover_HOST; do
+                       if [[ -z "${!varname}" ]]; then
+                               eval $varname=$host
+                       fi
+               done
+       done
+}
+
+mountcli() {
+       [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
+       if [ ! -z $arg1 ]; then
+               [ "$arg1" = "server_only" ] && return
+       fi
+       mount_client $MOUNT
+       if [ -n "$CLIENTS" ]; then
+               zconf_mount_clients $CLIENTS $MOUNT
+       fi
+       clients_up
+
+       if [ "$MOUNT_2" ]; then
+               mount_client $MOUNT2
+               if [ -n "$CLIENTS" ]; then
+                       zconf_mount_clients $CLIENTS $MOUNT2
+               fi
+       fi
+}
+
+sk_nodemap_setup() {
+       local sk_map_name=${1:-$SK_S2SNM}
+       local sk_map_nodes=${2:-$HOSTNAME}
+       do_node $(mgs_node) "$LCTL nodemap_add $sk_map_name"
+       for servernode in $sk_map_nodes; do
+               local nids=$(do_nodes $servernode "$LCTL list_nids")
+               for nid in $nids; do
+                       do_node $(mgs_node) "$LCTL nodemap_add_range --name \
+                               $sk_map_name --range $nid"
+               done
+       done
+}
+
 setupall() {
+       local arg1=$1
+
        nfs_client_mode && return
        cifs_client_mode && return
 
@@ -3691,73 +4590,73 @@ setupall() {
 
        load_modules
 
+       init_gss
+
        if [ -z "$CLIENTONLY" ]; then
                echo Setup mgs, mdt, osts
                echo $WRITECONF | grep -q "writeconf" && writeconf_all
-               if ! combined_mgs_mds ; then
-                       start mgs $(mgsdevname) $MGS_MOUNT_OPTS
-               fi
-
-        for num in `seq $MDSCOUNT`; do
-            DEVNAME=$(mdsdevname $num)
-            start mds$num $DEVNAME $MDS_MOUNT_OPTS
-
-            # We started mds, now we should set failover variables properly.
-            # Set mds${num}failover_HOST if it is not set (the default failnode).
-            local varname=mds${num}failover_HOST
-            if [ -z "${!varname}" ]; then
-                eval mds${num}failover_HOST=$(facet_host mds$num)
-            fi
-
-            if [ $IDENTITY_UPCALL != "default" ]; then
-                switch_identity $num $IDENTITY_UPCALL
-            fi
-        done
-        for num in `seq $OSTCOUNT`; do
-            DEVNAME=$(ostdevname $num)
-            start ost$num $DEVNAME $OST_MOUNT_OPTS
-
-            # We started ost$num, now we should set ost${num}failover variable properly.
-            # Set ost${num}failover_HOST if it is not set (the default failnode).
-            varname=ost${num}failover_HOST
-            if [ -z "${!varname}" ]; then
-                eval ost${num}failover_HOST=$(facet_host ost${num})
-            fi
-
-        done
-    fi
-
-    init_gss
-
-    # wait a while to allow sptlrpc configuration be propogated to targets,
-    # only needed when mounting new target devices.
-    if $GSS; then
-        sleep 10
-    fi
 
-    [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
-    mount_client $MOUNT
-    [ -n "$CLIENTS" ] && zconf_mount_clients $CLIENTS $MOUNT
-    clients_up
+               if $SK_MOUNTED; then
+                       echo "Shared Key file system already mounted"
+               else
+                       mountmgs
+                       mountmds
+                       mountoss
+                       if $SHARED_KEY; then
+                               export SK_MOUNTED=true
+                       fi
+               fi
+               if $GSS_SK; then
+                       echo "GSS_SK: setting kernel keyring perms"
+                       do_nodes $(comma_list $(all_nodes)) \
+                               "keyctl show | grep lustre | cut -c1-11 |
+                               sed -e 's/ //g;' |
+                               xargs -IX keyctl setperm X 0x3f3f3f3f"
+
+                       if $SK_S2S; then
+                               # Need to start one nodemap for servers,
+                               # and one for clients.
+                               sk_nodemap_setup $SK_S2SNM \
+                                       $(comma_list $(all_server_nodes))
+                               mountcli
+                               sk_nodemap_setup $SK_S2SNMCLI \
+                                       ${CLIENTS:-$HOSTNAME}
+                               echo "Nodemap set up for SK S2S, remounting."
+                               stopall
+                               mountmgs
+                               mountmds
+                               mountoss
+                       fi
+               fi
+       fi
 
-    if [ "$MOUNT_2" ]; then
-        mount_client $MOUNT2
-        [ -n "$CLIENTS" ] && zconf_mount_clients $CLIENTS $MOUNT2
-    fi
+       # wait a while to allow sptlrpc configuration be propogated to targets,
+       # only needed when mounting new target devices.
+       if $GSS; then
+               sleep 10
+       fi
 
-    init_param_vars
+       mountcli
+       init_param_vars
 
-    # by remounting mdt before ost, initial connect from mdt to ost might
-    # timeout because ost is not ready yet. wait some time to its fully
-    # recovery. initial obd_connect timeout is 5s; in GSS case it's preceeded
-    # by a context negotiation rpc with $TIMEOUT.
-    # FIXME better by monitoring import status.
-    if $GSS; then
-        set_flavor_all $SEC
-        sleep $((TIMEOUT + 5))
-    else
-        sleep 5
-    fi
+       # by remounting mdt before ost, initial connect from mdt to ost might
+       # timeout because ost is not ready yet. wait some time to its fully
+       # recovery. initial obd_connect timeout is 5s; in GSS case it's
+       # preceeded by a context negotiation rpc with $TIMEOUT.
+       # FIXME better by monitoring import status.
+       if $GSS; then
+               if $GSS_SK; then
+                       set_rule $FSNAME any cli2mdt $SK_FLAVOR
+                       set_rule $FSNAME any cli2ost $SK_FLAVOR
+                       wait_flavor cli2mdt $SK_FLAVOR
+                       wait_flavor cli2ost $SK_FLAVOR
+               else
+                       set_flavor_all $SEC
+               fi
+               sleep $((TIMEOUT + 5))
+       else
+               sleep 5
+       fi
 }
 
 mounted_lustre_filesystems() {
@@ -3983,20 +4882,10 @@ check_config_client () {
         return 0
     fi
 
-    local myMGS_host=$mgs_HOST
-    if [ "$NETTYPE" = "ptl" ]; then
-        myMGS_host=$(h2ptl $mgs_HOST | sed -e s/@ptl//)
-    fi
-
     echo Checking config lustre mounted on $mntpt
     local mgshost=$(mount | grep " $mntpt " | awk -F@ '{print $1}')
     mgshost=$(echo $mgshost | awk -F: '{print $1}')
 
-#    if [ "$mgshost" != "$myMGS_host" ]; then
-#            log "Bad config file: lustre is mounted with mgs $mgshost, but mgs_HOST=$mgs_HOST, NETTYPE=$NETTYPE
-#                   Please use correct config or set mds_HOST correctly!"
-#    fi
-
 }
 
 check_config_clients () {
@@ -4060,7 +4949,7 @@ check_and_setup_lustre() {
        # 1.
        # both MOUNT and MOUNT2 are not mounted
        if ! is_mounted $MOUNT && ! is_mounted $MOUNT2; then
-               [ "$REFORMAT" = "yes" ] && formatall
+               [ "$REFORMAT" = "yes" ] && CLEANUP_DM_DEV=true formatall
                # setupall mounts both MOUNT and MOUNT2 (if MOUNT_2 is set)
                setupall
                is_mounted $MOUNT || error "NAME=$NAME not mounted"
@@ -4125,7 +5014,9 @@ check_and_setup_lustre() {
        fi
 
        init_gss
-       if $GSS; then
+       if $GSS_SK; then
+               set_flavor_all null
+       elif $GSS; then
                set_flavor_all $SEC
        fi
 
@@ -4228,7 +5119,7 @@ run_e2fsck() {
        if [ -n "$(grep "DNE mode isn't supported" $log)" ]; then
                rm -f $log
                if [ $MDSCOUNT -gt 1 ]; then
-                       skip "DNE mode isn't supported!"
+                       skip_noexit "DNE mode isn't supported!"
                        cleanupall
                        exit_status
                else
@@ -4372,7 +5263,7 @@ check_and_cleanup_lustre() {
                cleanup_mount $MOUNT2
        fi
 
-       if [ "$I_MOUNTED" = "yes" ]; then
+       if [[ "$I_MOUNTED" = "yes" ]] && ! $AUSTER_CLEANUP; then
                cleanupall -f || error "cleanup failed"
                unset I_MOUNTED
        fi
@@ -4409,17 +5300,19 @@ wait_for_function () {
 }
 
 check_network() {
-    local host=$1
-    local max=$2
-    local sleep=${3:-5}
+       local host=$1
+       local max=$2
+       local sleep=${3:-5}
 
-    echo `date +"%H:%M:%S (%s)"` waiting for $host network $max secs ...
-    if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then
-        echo "Network not available!"
-        exit 1
-    fi
+       [ "$host" = "$HOSTNAME" ] && return 0
+
+       echo "$(date +'%H:%M:%S (%s)') waiting for $host network $max secs ..."
+       if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then
+               echo "Network not available!"
+               exit 1
+       fi
 
-    echo `date +"%H:%M:%S (%s)"` network interface is UP
+       echo "$(date +'%H:%M:%S (%s)') network interface is UP"
 }
 
 no_dsh() {
@@ -4825,6 +5718,36 @@ report_error() {
 # Test interface
 ##################################
 
+# usage: stack_trap arg sigspec
+#
+# stack_trap() behaves like bash's built-in trap, except that it "stacks" the
+# command ``arg`` on top of previously defined commands for ``sigspec`` instead
+# of overwriting them.
+# stacked traps are executed in reverse order of their registration
+#
+# arg and sigspec have the same meaning as in man (1) trap
+stack_trap()
+{
+       local arg="$1"
+       local sigspec="$2"
+
+       # Use "trap -p" to get the quoting right
+       local old_trap="$(trap -p "$sigspec")"
+       # Append ";" and remove the leading "trap -- '" added by "trap -p"
+       old_trap="${old_trap:+"; ${old_trap#trap -- \'}"}"
+
+       # Once again, use "trap -p" to get the quoting right
+       local new_trap="$(trap -- "$arg" "$sigspec"
+                         trap -p "$sigspec"
+                         trap -- '' "$sigspec")"
+
+       # Remove the trailing "' $sigspec" part added by "trap -p" and merge
+       #
+       # The resulting string should be safe to "eval" as it is (supposedly
+       # correctly) quoted by "trap -p"
+       eval "${new_trap%\' $sigspec}${old_trap:-"' $sigspec"}"
+}
+
 error_noexit() {
        report_error "$@"
 }
@@ -4879,7 +5802,7 @@ skip_env () {
        $FAIL_ON_SKIP_ENV && error false $@ || skip $@
 }
 
-skip() {
+skip_noexit() {
        echo
        log " SKIP: $TESTSUITE $TESTNAME $@"
 
@@ -4894,8 +5817,13 @@ skip() {
                echo "$TESTSUITE: SKIP: $TESTNAME $@" >> $TESTSUITELOG || true
 }
 
+skip() {
+       skip_noexit $@
+       exit 0
+}
+
 build_test_filter() {
-    EXCEPT="$EXCEPT $(testslist_filter)"
+       EXCEPT="$EXCEPT $(testslist_filter)"
 
        for O in $ONLY; do
                if [[ $O = [0-9]*-[0-9]* ]]; then
@@ -4969,32 +5897,32 @@ run_test() {
        ALWAYS_SKIPPED="y"
        testname=EXCEPT_$1
        if [ ${!testname}x != x ]; then
-               TESTNAME=test_$1 skip "skipping excluded test $1"
+               TESTNAME=test_$1 skip_noexit "skipping excluded test $1"
                return 0
        fi
        testname=EXCEPT_$base
        if [ ${!testname}x != x ]; then
-               TESTNAME=test_$1 skip "skipping excluded test $1 (base $base)"
+               TESTNAME=test_$1 skip_noexit "skipping excluded test $1 (base $base)"
                return 0
        fi
        testname=EXCEPT_ALWAYS_$1
        if [ ${!testname}x != x ]; then
-               TESTNAME=test_$1 skip "skipping ALWAYS excluded test $1"
+               TESTNAME=test_$1 skip_noexit "skipping ALWAYS excluded test $1"
                return 0
        fi
        testname=EXCEPT_ALWAYS_$base
        if [ ${!testname}x != x ]; then
-               TESTNAME=test_$1 skip "skipping ALWAYS excluded test $1 (base $base)"
+               TESTNAME=test_$1 skip_noexit "skipping ALWAYS excluded test $1 (base $base)"
                return 0
        fi
        testname=EXCEPT_SLOW_$1
        if [ ${!testname}x != x ]; then
-               TESTNAME=test_$1 skip "skipping SLOW test $1"
+               TESTNAME=test_$1 skip_noexit "skipping SLOW test $1"
                return 0
        fi
        testname=EXCEPT_SLOW_$base
        if [ ${!testname}x != x ]; then
-               TESTNAME=test_$1 skip "skipping SLOW test $1 (base $base)"
+               TESTNAME=test_$1 skip_noexit "skipping SLOW test $1 (base $base)"
                return 0
        fi
 
@@ -5203,8 +6131,9 @@ check_grant() {
        export base=$(basetest $1)
        [ "$CHECK_GRANT" == "no" ] && return 0
 
-       testname=GCHECK_ONLY_${base}
-       [ ${!testname}x == x ] && return 0
+       testnamebase=GCHECK_ONLY_${base}
+       testname=GCHECK_ONLY_$1
+       [ ${!testnamebase}x == x -a ${!testname}x == x ] && return 0
 
        echo -n "checking grant......"
 
@@ -5213,6 +6142,7 @@ check_grant() {
 
        # sync all the data and make sure no pending data on server
        do_nodes $clients sync
+       clients_up # initiate all idling connections
 
        # get client grant
        client_grant=$(do_nodes $clients \
@@ -5220,17 +6150,22 @@ check_grant() {
                awk '{ total += $1 } END { printf("%0.0f", total) }')
 
        # get server grant
+       # which is tot_granted less grant_precreate
        server_grant=$(do_nodes $(comma_list $(osts_nodes)) \
-               "$LCTL get_param -n obdfilter.${FSNAME}-OST*.tot_granted" |
-               awk '{ total += $1 } END { printf("%0.0f", total) }')
+               "$LCTL get_param "\
+               "obdfilter.${FSNAME}-OST*.{tot_granted,tot_pending,grant_precreate}" |
+               sed 's/=/ /'| awk '/tot_granted/{ total += $2 };
+                               /tot_pending/{ total -= $2 };
+                               /grant_precreate/{ total -= $2 };
+                               END { printf("%0.0f", total) }')
 
        # check whether client grant == server grant
        if [[ $client_grant -ne $server_grant ]]; then
-               echo "failed: client:${client_grant} server: ${server_grant}."
                do_nodes $(comma_list $(osts_nodes)) \
-                       "$LCTL get_param obdfilter.${FSNAME}-OST*.tot*"
+                       "$LCTL get_param obdfilter.${FSNAME}-OST*.tot*" \
+                       "obdfilter.${FSNAME}-OST*.grant_*"
                do_nodes $clients "$LCTL get_param osc.${FSNAME}-*.cur_*_bytes"
-               return 1
+               error "failed: client:${client_grant} server: ${server_grant}."
        else
                echo "pass: client:${client_grant} server: ${server_grant}"
        fi
@@ -5388,6 +6323,11 @@ facets_nodes () {
        echo -n $nodes_sort
 }
 
+# Get name of the active MGS node.
+mgs_node () {
+       echo -n $(facets_nodes $(get_facets MGS))
+}
+
 # Get all of the active MDS nodes.
 mdts_nodes () {
        echo -n $(facets_nodes $(get_facets MDS))
@@ -5429,7 +6369,7 @@ remote_nodes_list () {
 all_mdts_nodes () {
        local host
        local failover_host
-       local nodes
+       local nodes="${mds_HOST} ${mdsfailover_HOST}"
        local nodes_sort
        local i
 
@@ -5447,7 +6387,7 @@ all_mdts_nodes () {
 all_osts_nodes () {
        local host
        local failover_host
-       local nodes
+       local nodes="${ost_HOST} ${ostfailover_HOST}"
        local nodes_sort
        local i
 
@@ -5572,13 +6512,19 @@ get_stripe () {
 
 setstripe_nfsserver () {
        local dir=$1
+       local nfsexportdir=$2
+       shift
+       shift
 
-       local nfsserver=$(awk '"'$dir'" ~ $2 && $3 ~ "nfs" && $2 != "/" \
-               { print $1 }' /proc/mounts | cut -f 1 -d : | head -n1)
+       local -a nfsexport=($(awk '"'$dir'" ~ $2 && $3 ~ "nfs" && $2 != "/" \
+               { print $1 }' /proc/mounts | cut -f 1 -d :))
 
-       [ -z $nfsserver ] && echo "$dir is not nfs mounted" && return 1
+       # check that only one nfs mounted
+       [[ -z $nfsexport ]] && echo "$dir is not nfs mounted" && return 1
+       (( ${#nfsexport[@]} == 1 )) ||
+               error "several nfs mounts found for $dir: ${nfsexport[@]} !"
 
-       do_nodev $nfsserver lfs setstripe "$@"
+       do_nodev ${nfsexport[0]} lfs setstripe $nfsexportdir "$@"
 }
 
 # Check and add a test group.
@@ -5748,32 +6694,22 @@ inodes_available () {
 }
 
 mdsrate_inodes_available () {
-    local min_inodes=$(inodes_available)
-    echo $((min_inodes * 99 / 100))
-}
-
-# reset llite stat counters
-clear_llite_stats(){
-        lctl set_param -n llite.*.stats 0
-}
-
-# sum llite stat items
-calc_llite_stats() {
-       local res=$(lctl get_param -n llite.*.stats |
-               awk '/^'"$1"'/ {sum += $2} END { printf("%0.0f", sum) }')
-       echo $((res))
+       local min_inodes=$(inodes_available)
+       echo $((min_inodes * 99 / 100))
 }
 
-# reset osc stat counters
-clear_osc_stats(){
-       lctl set_param -n osc.*.osc_stats 0
+# reset stat counters
+clear_stats() {
+       local paramfile="$1"
+       lctl set_param -n $paramfile=0
 }
 
-# sum osc stat items
-calc_osc_stats() {
-       local res=$(lctl get_param -n osc.*.osc_stats |
-               awk '/^'"$1"'/ {sum += $2} END { printf("%0.0f", sum) }')
-       echo $((res))
+# sum stat items
+calc_stats() {
+       local paramfile="$1"
+       local stat="$2"
+       lctl get_param -n $paramfile |
+               awk '/^'$stat'/ { sum += $2 } END { printf("%0.0f", sum) }'
 }
 
 calc_sum () {
@@ -5781,8 +6717,8 @@ calc_sum () {
 }
 
 calc_osc_kbytes () {
-        df $MOUNT > /dev/null
-        $LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum
+       $LFS df $MOUNT > /dev/null
+       $LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum
 }
 
 # save_lustre_params(comma separated facet list, parameter_mask)
@@ -5790,16 +6726,17 @@ calc_osc_kbytes () {
 save_lustre_params() {
        local facets=$1
        local facet
-       local nodes
-       local node
+       local facet_svc
 
        for facet in ${facets//,/ }; do
-               node=$(facet_active_host $facet)
-               [[ *\ $node\ * = " $nodes " ]] && continue
-               nodes="$nodes $node"
-
-               do_node $node "$LCTL get_param $2 |
-                       while read s; do echo $facet \\\$s; done"
+               facet_svc=$(facet_svc $facet)
+               do_facet $facet \
+                       "params=\\\$($LCTL get_param $2);
+                        [[ -z \\\"$facet_svc\\\" ]] && param= ||
+                        param=\\\$(grep $facet_svc <<< \\\"\\\$params\\\");
+                        [[ -z \\\$param ]] && param=\\\"\\\$params\\\";
+                        while read s; do echo $facet \\\$s;
+                        done <<< \\\"\\\$param\\\""
        done
 }
 
@@ -5810,7 +6747,7 @@ restore_lustre_params() {
        local val
 
        while IFS=" =" read facet name val; do
-               do_facet $facet "$LCTL set_param -n $name $val"
+               do_facet $facet "$LCTL set_param -n $name=$val"
        done
 }
 
@@ -5863,7 +6800,7 @@ convert_facet2label() {
 }
 
 get_clientosc_proc_path() {
-       echo "${1}-osc-*"
+       echo "${1}-osc-ffff*"
 }
 
 # If the 2.0 MDS was mounted on 1.8 device, then the OSC and LOV names
@@ -5888,10 +6825,7 @@ get_mdtosc_proc_path() {
        local mdt_label=$(convert_facet2label $mds_facet)
        local mdt_index=$(echo $mdt_label | sed -e 's/^.*-//')
 
-       if [ $(lustre_version_code $mds_facet) -le $(version_code 1.8.0) ] ||
-          mds_on_old_device $mds_facet; then
-               echo "${ost_label}-osc"
-       elif [[ $ost_label = *OST* ]]; then
+       if [[ $ost_label = *OST* ]]; then
                echo "${ost_label}-osc-${mdt_index}"
        else
                echo "${ost_label}-osp-${mdt_index}"
@@ -5921,7 +6855,7 @@ _wait_import_state () {
     local i=0
 
        CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq)
-    while [ "${CONN_STATE}" != "${expected}" ]; do
+    while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do
         if [ "${expected}" == "DISCONN" ]; then
             # for disconn we can check after proc entry is removed
             [ "x${CONN_STATE}" == "x" ] && return 0
@@ -6020,9 +6954,7 @@ _wait_osc_import_state() {
 
        if [[ $facet == client* ]]; then
                # During setup time, the osc might not be setup, it need wait
-               # until list_param can return valid value. And also if there
-               # are mulitple osc entries we should list all of them before
-               # go to wait.
+               # until list_param can return valid value.
                params=$($LCTL list_param $param 2>/dev/null || true)
                while [ -z "$params" ]; do
                        if [ $i -ge $maxtime ]; then
@@ -6046,7 +6978,7 @@ _wait_osc_import_state() {
 
        if ! do_rpc_nodes "$(facet_active_host $facet)" \
                        wait_import_state $expected "$params" $maxtime; then
-               error "import is not in ${expected} state"
+               error "$facet: import is not in $expected state after $maxtime"
                return 1
        fi
 
@@ -6068,6 +7000,10 @@ wait_osc_import_state() {
        fi
 }
 
+wait_osc_import_ready() {
+       wait_osc_import_state $1 $2 "\(FULL\|IDLE\)"
+}
+
 _wait_mgc_import_state() {
        local facet=$1
        local expected=$2
@@ -6130,7 +7066,7 @@ wait_dne_interconnect() {
 
        if [ $MDSCOUNT -gt 1 ]; then
                for num in $(seq $MDSCOUNT); do
-                       wait_osc_import_state mds mds$num FULL
+                       wait_osc_import_ready mds mds$num
                done
        fi
 }
@@ -6153,7 +7089,7 @@ do_rpc_nodes () {
        local LIBPATH="/usr/lib/lustre/tests:/usr/lib64/lustre/tests:"
        local TESTPATH="$RLUSTRE/tests:"
        local RPATH="PATH=${TESTPATH}${LIBPATH}${PATH}:/sbin:/bin:/usr/sbin:"
-       do_nodesv $list "${RPATH} NAME=${NAME} sh rpc.sh $@ "
+       do_nodesv $list "${RPATH} NAME=${NAME} bash rpc.sh $@ "
 }
 
 wait_clients_import_state () {
@@ -6183,13 +7119,54 @@ wait_clients_import_state () {
                local params=$(expand_list $params $proc_path)
        done
 
-       if ! do_rpc_nodes "$list" wait_import_state_mount $expected $params;
+       if ! do_rpc_nodes "$list" wait_import_state_mount "$expected" $params;
        then
                error "import is not in ${expected} state"
                return 1
        fi
 }
 
+wait_osp_active() {
+       local facet=$1
+       local tgt_name=$2
+       local tgt_idx=$3
+       local expected=$4
+       local num
+
+       # wait until all MDTs are in the expected state
+       for ((num = 1; num <= $MDSCOUNT; num++)); do
+               local mdtosp=$(get_mdtosc_proc_path mds${num} ${tgt_name})
+               local mproc
+
+               if [ $facet = "mds" ]; then
+                       mproc="osp.$mdtosp.active"
+                       [ $num -eq $((tgt_idx + 1)) ] && continue
+               else
+                       mproc="osc.$mdtosp.active"
+               fi
+
+               echo "check $mproc"
+               while [ 1 ]; do
+                       sleep 5
+                       local result=$(do_facet mds${num} "$LCTL get_param -n $mproc")
+                       local max=30
+                       local wait=0
+
+                       [ ${PIPESTATUS[0]} = 0 ] || error "Can't read $mproc"
+                       if [ $result -eq $expected ]; then
+                               echo -n "target updated after"
+                               echo "$wait sec (got $result)"
+                               break
+                       fi
+                       wait=$((wait + 5))
+                       if [ $wait -eq $max ]; then
+                               error "$tgt_name: wanted $expected got $result"
+                       fi
+                       echo "Waiting $((max - wait)) secs for $tgt_name"
+               done
+       done
+}
+
 oos_full() {
        local -a AVAILA
        local -a GRANTA
@@ -6234,6 +7211,7 @@ create_pool() {
        local fsname=${1%%.*}
        local poolname=${1##$fsname.}
 
+       stack_trap "destroy_test_pools $fsname" EXIT
        do_facet mgs lctl pool_new $1
        local RC=$?
        # get param should return err unless pool is created
@@ -6313,22 +7291,22 @@ destroy_pool() {
 }
 
 destroy_pools () {
-    local fsname=${1:-$FSNAME}
-    local poolname
-    local listvar=${fsname}_CREATED_POOLS
+       local fsname=${1:-$FSNAME}
+       local poolname
+       local listvar=${fsname}_CREATED_POOLS
 
-    [ x${!listvar} = x ] && return 0
+       [ x${!listvar} = x ] && return 0
 
-    echo destroy the created pools: ${!listvar}
-    for poolname in ${!listvar//,/ }; do
-        destroy_pool $fsname.$poolname
-    done
+       echo "Destroy the created pools: ${!listvar}"
+       for poolname in ${!listvar//,/ }; do
+               destroy_pool $fsname.$poolname
+       done
 }
 
-cleanup_pools () {
-    local fsname=${1:-$FSNAME}
-    trap 0
-    destroy_pools $fsname
+destroy_test_pools () {
+       trap 0
+       local fsname=${1:-$FSNAME}
+       destroy_pools $fsname || true
 }
 
 gather_logs () {
@@ -6361,6 +7339,7 @@ gather_logs () {
     do_nodesv $list \
         "$LCTL dk > ${prefix}.debug_log.\\\$(hostname -s).${suffix};
          dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}"
+
     if [ ! -f $LOGDIR/shared ]; then
         do_nodes $list rsync -az "${prefix}.*.${suffix}" $HOSTNAME:$LOGDIR
     fi
@@ -6434,15 +7413,17 @@ recovery_time_min() {
 }
 
 get_clients_mount_count () {
-    local clients=${CLIENTS:-`hostname`}
+       local clients=${CLIENTS:-$HOSTNAME}
 
-    # we need to take into account the clients mounts and
-    # exclude mds/ost mounts if any;
-    do_nodes $clients cat /proc/mounts | grep lustre | grep $MOUNT | wc -l
+       # we need to take into account the clients mounts and
+       # exclude mds/ost mounts if any;
+       do_nodes $clients cat /proc/mounts | grep lustre |
+               grep -w $MOUNT | wc -l
 }
 
 # gss functions
 PROC_CLI="srpc_info"
+PROC_CON="srpc_contexts"
 
 combination()
 {
@@ -6465,28 +7446,39 @@ combination()
 }
 
 calc_connection_cnt() {
-    local dir=$1
+       local dir=$1
 
-    # MDT->MDT = 2 * C(M, 2)
-    # MDT->OST = M * O
-    # CLI->OST = C * O
-    # CLI->MDT = C * M
-    comb_m2=$(combination $MDSCOUNT 2)
+       # MDT->MDT = 2 * C(M, 2)
+       # MDT->OST = M * O
+       # CLI->OST = C * O
+       # CLI->MDT = C * M
+       comb_m2=$(combination $MDSCOUNT 2)
 
-    local num_clients=$(get_clients_mount_count)
+       local num_clients=$(get_clients_mount_count)
 
-    local cnt_mdt2mdt=$((comb_m2 * 2))
-    local cnt_mdt2ost=$((MDSCOUNT * OSTCOUNT))
-    local cnt_cli2ost=$((num_clients * OSTCOUNT))
-    local cnt_cli2mdt=$((num_clients * MDSCOUNT))
-    local cnt_all2ost=$((cnt_mdt2ost + cnt_cli2ost))
-    local cnt_all2mdt=$((cnt_mdt2mdt + cnt_cli2mdt))
-    local cnt_all2all=$((cnt_mdt2ost + cnt_mdt2mdt + cnt_cli2ost + cnt_cli2mdt))
+       local cnt_mdt2mdt=$((comb_m2 * 2))
+       local cnt_mdt2ost=$((MDSCOUNT * OSTCOUNT))
+       local cnt_cli2ost=$((num_clients * OSTCOUNT))
+       local cnt_cli2mdt=$((num_clients * MDSCOUNT))
+       if is_mounted $MOUNT2; then
+               cnt_cli2mdt=$((cnt_cli2mdt * 2))
+               cnt_cli2ost=$((cnt_cli2ost * 2))
+       fi
+       if local_mode; then
+               cnt_mdt2mdt=0
+               cnt_mdt2ost=0
+               cnt_cli2ost=2
+               cnt_cli2mdt=1
+       fi
+       local cnt_all2ost=$((cnt_mdt2ost + cnt_cli2ost))
+       local cnt_all2mdt=$((cnt_mdt2mdt + cnt_cli2mdt))
+       local cnt_all2all=$((cnt_mdt2ost + cnt_mdt2mdt \
+               + cnt_cli2ost + cnt_cli2mdt))
 
-    local var=cnt_$dir
-    local res=${!var}
+       local var=cnt_$dir
+       local res=${!var}
 
-    echo $res
+       echo $res
 }
 
 set_rule()
@@ -6511,6 +7503,13 @@ set_rule()
     do_facet mgs "$LCTL conf_param $cmd"
 }
 
+count_contexts()
+{
+       local output=$1
+       local total_ctx=$(echo "$output" | grep -c "expire.*key.*hdl")
+       echo $total_ctx
+}
+
 count_flvr()
 {
     local output=$1
@@ -6552,12 +7551,22 @@ flvr_cnt_cli2mdt()
     local flavor=$1
     local cnt
 
-    local clients=${CLIENTS:-`hostname`}
+    local clients=${CLIENTS:-$HOSTNAME}
 
     for c in ${clients//,/ }; do
-        output=`do_node $c lctl get_param -n mdc.*-MDT*-mdc-*.$PROC_CLI 2>/dev/null`
-        tmpcnt=`count_flvr "$output" $flavor`
-        cnt=$((cnt + tmpcnt))
+       local output=$(do_node $c lctl get_param -n \
+                mdc.*-*-mdc-*.$PROC_CLI 2>/dev/null)
+       local tmpcnt=$(count_flvr "$output" $flavor)
+       if $GSS_SK && [ $flavor != "null" ]; then
+               # tmpcnt=min(contexts,flavors) to ensure SK context is on
+               output=$(do_node $c lctl get_param -n \
+                        mdc.*-MDT*-mdc-*.$PROC_CON 2>/dev/null)
+               local outcon=$(count_contexts "$output")
+               if [ "$outcon" -lt "$tmpcnt" ]; then
+                       tmpcnt=$outcon
+               fi
+       fi
+       cnt=$((cnt + tmpcnt))
     done
     echo $cnt
 }
@@ -6567,11 +7576,21 @@ flvr_cnt_cli2ost()
     local flavor=$1
     local cnt
 
-    local clients=${CLIENTS:-`hostname`}
+    local clients=${CLIENTS:-$HOSTNAME}
 
     for c in ${clients//,/ }; do
-        output=`do_node $c lctl get_param -n osc.*OST*-osc-[^M][^D][^T]*.$PROC_CLI 2>/dev/null`
-        tmpcnt=`count_flvr "$output" $flavor`
+       local output=$(do_node $c lctl get_param -n \
+                osc.*OST*-osc-[^M][^D][^T]*.$PROC_CLI 2>/dev/null)
+       local tmpcnt=$(count_flvr "$output" $flavor)
+       if $GSS_SK && [ $flavor != "null" ]; then
+               # tmpcnt=min(contexts,flavors) to ensure SK context is on
+               output=$(do_node $c lctl get_param -n \
+                        osc.*OST*-osc-[^M][^D][^T]*.$PROC_CON 2>/dev/null)
+               local outcon=$(count_contexts "$output")
+               if [ "$outcon" -lt "$tmpcnt" ]; then
+                       tmpcnt=$outcon
+               fi
+       fi
         cnt=$((cnt + tmpcnt))
     done
     echo $cnt
@@ -6588,8 +7607,18 @@ flvr_cnt_mdt2mdt()
     fi
 
     for num in `seq $MDSCOUNT`; do
-        output=`do_facet mds$num lctl get_param -n mdc.*-MDT*-mdc[0-9]*.$PROC_CLI 2>/dev/null`
-        tmpcnt=`count_flvr "$output" $flavor`
+       local output=$(do_facet mds$num lctl get_param -n \
+               osp.*-MDT*osp-MDT*.$PROC_CLI 2>/dev/null)
+       local tmpcnt=$(count_flvr "$output" $flavor)
+       if $GSS_SK && [ $flavor != "null" ]; then
+               # tmpcnt=min(contexts,flavors) to ensure SK context is on
+               output=$(do_facet mds$num lctl get_param -n \
+                       osp.*-MDT*osp-MDT*.$PROC_CON 2>/dev/null)
+               local outcon=$(count_contexts "$output")
+               if [ "$outcon" -lt "$tmpcnt" ]; then
+                       tmpcnt=$outcon
+               fi
+       fi
         cnt=$((cnt + tmpcnt))
     done
     echo $cnt;
@@ -6604,9 +7633,18 @@ flvr_cnt_mdt2ost()
     for num in `seq $MDSCOUNT`; do
         mdtosc=$(get_mdtosc_proc_path mds$num)
         mdtosc=${mdtosc/-MDT*/-MDT\*}
-        output=$(do_facet mds$num lctl get_param -n \
-            osc.$mdtosc.$PROC_CLI 2>/dev/null)
-        tmpcnt=`count_flvr "$output" $flavor`
+       local output=$(do_facet mds$num lctl get_param -n \
+                osc.$mdtosc.$PROC_CLI 2>/dev/null)
+       local tmpcnt=$(count_flvr "$output" $flavor)
+       if $GSS_SK && [ $flavor != "null" ]; then
+               # tmpcnt=min(contexts,flavors) to ensure SK context is on
+               output=$(do_facet mds$num lctl get_param -n \
+                        osc.$mdtosc.$PROC_CON 2>/dev/null)
+               local outcon=$(count_contexts "$output")
+               if [ "$outcon" -lt "$tmpcnt" ]; then
+                       tmpcnt=$outcon
+               fi
+       fi
         cnt=$((cnt + tmpcnt))
     done
     echo $cnt;
@@ -6616,7 +7654,8 @@ flvr_cnt_mgc2mgs()
 {
     local flavor=$1
 
-    output=`do_facet client lctl get_param -n mgc.*.$PROC_CLI 2>/dev/null`
+    local output=$(do_facet client lctl get_param -n mgc.*.$PROC_CLI \
+                       2>/dev/null)
     count_flvr "$output" $flavor
 }
 
@@ -6655,75 +7694,109 @@ do_check_flavor()
 
 wait_flavor()
 {
-    local dir=$1        # from to
-    local flavor=$2     # flavor expected
-    local expect=${3:-$(calc_connection_cnt $dir)}     # number expected
-
-    local res=0
-
-    for ((i=0;i<20;i++)); do
-        echo -n "checking $dir..."
-        res=$(do_check_flavor $dir $flavor)
-        echo "found $res/$expect $flavor connections"
-        [ $res -ge $expect ] && return 0
-        sleep 4
-    done
+       local dir=$1        # from to
+       local flavor=$2     # flavor expected
+       local expect=${3:-$(calc_connection_cnt $dir)} # number expected
+       local WAITFLAVOR_MAX=20 # how many retries before abort?
+
+       local res=0
+       for ((i = 0; i < $WAITFLAVOR_MAX; i++)); do
+               echo -n "checking $dir..."
+               res=$(do_check_flavor $dir $flavor)
+               echo "found $res/$expect $flavor connections"
+               [ $res -ge $expect ] && return 0
+               sleep 4
+       done
 
-    echo "Error checking $flavor of $dir: expect $expect, actual $res"
-    return 1
+       echo "Error checking $flavor of $dir: expect $expect, actual $res"
+#      echo "Dumping additional logs for SK debug.."
+       do_nodes $(comma_list $(all_server_nodes)) "keyctl show"
+       if $dump; then
+               gather_logs $(comma_list $(nodes_list))
+       fi
+       return 1
 }
 
 restore_to_default_flavor()
 {
-    local proc="mgs.MGS.live.$FSNAME"
+       local proc="mgs.MGS.live.$FSNAME"
 
-    echo "restoring to default flavor..."
+       echo "restoring to default flavor..."
 
-    nrule=`do_facet mgs lctl get_param -n $proc 2>/dev/null | grep ".srpc.flavor." | wc -l`
+       local nrule=$(do_facet mgs lctl get_param -n $proc 2>/dev/null |
+               grep ".srpc.flavor" | wc -l)
 
-    # remove all existing rules if any
-    if [ $nrule -ne 0 ]; then
-        echo "$nrule existing rules"
-        for rule in `do_facet mgs lctl get_param -n $proc 2>/dev/null | grep ".srpc.flavor."`; do
-            echo "remove rule: $rule"
-            spec=`echo $rule | awk -F = '{print $1}'`
-            do_facet mgs "$LCTL conf_param -d $spec"
-        done
-    fi
+       # remove all existing rules if any
+       if [ $nrule -ne 0 ]; then
+               echo "$nrule existing rules"
+               for rule in $(do_facet mgs lctl get_param -n $proc 2>/dev/null |
+                   grep ".srpc.flavor."); do
+                       echo "remove rule: $rule"
+                       spec=`echo $rule | awk -F = '{print $1}'`
+                       do_facet mgs "$LCTL conf_param -d $spec"
+               done
+       fi
 
-    # verify no rules left
-    nrule=`do_facet mgs lctl get_param -n $proc 2>/dev/null | grep ".srpc.flavor." | wc -l`
-    [ $nrule -ne 0 ] && error "still $nrule rules left"
+       # verify no rules left
+       nrule=$(do_facet mgs lctl get_param -n $proc 2>/dev/null |
+               grep ".srpc.flavor." | wc -l)
+       [ $nrule -ne 0 ] && error "still $nrule rules left"
 
-    # wait for default flavor to be applied
-    # currently default flavor for all connections are 'null'
-    wait_flavor all2all null
-    echo "now at default flavor settings"
+       # wait for default flavor to be applied
+       if $GSS_SK; then
+               if $SK_S2S; then
+                       set_rule $FSNAME any any $SK_FLAVOR
+                       wait_flavor all2all $SK_FLAVOR
+               else
+                       set_rule $FSNAME any cli2mdt $SK_FLAVOR
+                       set_rule $FSNAME any cli2ost $SK_FLAVOR
+                       wait_flavor cli2mdt $SK_FLAVOR
+                       wait_flavor cli2ost $SK_FLAVOR
+               fi
+               echo "GSS_SK now at default flavor: $SK_FLAVOR"
+       else
+               wait_flavor all2all null
+       fi
 }
 
 set_flavor_all()
 {
-    local flavor=${1:-null}
+       local flavor=${1:-null}
 
-    echo "setting all flavor to $flavor"
+       echo "setting all flavor to $flavor"
 
-    # FIXME need parameter to this fn
-    # and remove global vars
-    local cnt_all2all=$(calc_connection_cnt all2all)
+       # FIXME need parameter to this fn
+       # and remove global vars
+       local cnt_all2all=$(calc_connection_cnt all2all)
 
-    local res=$(do_check_flavor all2all $flavor)
-    if [ $res -eq $cnt_all2all ]; then
-        echo "already have total $res $flavor connections"
-        return
-    fi
+       local res=$(do_check_flavor all2all $flavor)
+       if [ $res -eq $cnt_all2all ]; then
+               echo "already have total $res $flavor connections"
+               return
+       fi
 
-    echo "found $res $flavor out of total $cnt_all2all connections"
-    restore_to_default_flavor
+       echo "found $res $flavor out of total $cnt_all2all connections"
+       restore_to_default_flavor
 
-    [[ $flavor = null ]] && return 0
+       [[ $flavor = null ]] && return 0
 
-    set_rule $FSNAME any any $flavor
-    wait_flavor all2all $flavor
+       if $GSS_SK && [ $flavor != "null" ]; then
+               if $SK_S2S; then
+                       set_rule $FSNAME any any $flavor
+                       wait_flavor all2all $flavor
+               else
+                       set_rule $FSNAME any cli2mdt $flavor
+                       set_rule $FSNAME any cli2ost $flavor
+                       set_rule $FSNAME any mdt2ost null
+                       set_rule $FSNAME any mdt2mdt null
+                       wait_flavor cli2mdt $flavor
+                       wait_flavor cli2ost $flavor
+               fi
+               echo "GSS_SK now at flavor: $flavor"
+       else
+               set_rule $FSNAME any any $flavor
+               wait_flavor all2all $flavor
+       fi
 }
 
 
@@ -6943,15 +8016,20 @@ run_sgpdd () {
 
 # returns the canonical name for an ldiskfs device
 ldiskfs_canon() {
-        local dev="$1"
-        local facet="$2"
-
-        do_facet $facet "dv=\\\$(lctl get_param -n $dev);
-if foo=\\\$(lvdisplay -c \\\$dv 2>/dev/null); then
-    echo dm-\\\${foo##*:};
-else
-    echo \\\$(basename \\\$dv);
-fi;"
+       local dev="$1"
+       local facet="$2"
+
+       do_facet $facet "dv=\\\$($LCTL get_param -n $dev);
+                        if foo=\\\$(lvdisplay -c \\\$dv 2>/dev/null); then
+                               echo dm-\\\${foo##*:};
+                        else
+                               name=\\\$(basename \\\$dv);
+                               if [[ \\\$name = *flakey* ]]; then
+                                       name=\\\$(lsblk -o NAME,KNAME |
+                                               awk /\\\$name/'{print \\\$NF}');
+                               fi;
+                               echo \\\$name;
+                        fi;"
 }
 
 is_sanity_benchmark() {
@@ -6966,7 +8044,7 @@ is_sanity_benchmark() {
 }
 
 min_ost_size () {
-    $LCTL get_param -n osc.*.kbytesavail | sort -n | head -n1
+       $LFS df | grep OST | awk '{print $4}' | sort -un | head -1
 }
 
 #
@@ -7018,7 +8096,9 @@ get_block_size() {
        echo -n ${size:-0}
 }
 
-# Check whether the "large_xattr" feature is enabled or not.
+# Check whether the "ea_inode" feature is enabled or not, to allow
+# ldiskfs xattrs over one block in size.  Allow both the historical
+# Lustre feature name (large_xattr) and the upstream name (ea_inode).
 large_xattr_enabled() {
        [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 0
 
@@ -7205,11 +8285,14 @@ test_mkdir() {
                local parent=$(dirname $path)
 
                [ -d $path ] && return 0
-               [ ! -d ${parent} ] && mkdir -p ${parent}
+               if [ ! -d ${parent} ]; then
+                       mkdir -p ${parent} ||
+                               error "mkdir parent '$parent' failed"
+               fi
        fi
 
        if [ $MDSCOUNT -le 1 ]; then
-               mkdir $path
+               mkdir $path || error "mkdir '$path' failed"
        else
                local test_num=$(echo $testnum | sed -e 's/[^0-9]*//g')
                local mdt_index
@@ -7220,20 +8303,27 @@ test_mkdir() {
                        mdt_index=$stripe_index
                fi
                echo "striped dir -i$mdt_index -c$stripe_count $path"
-               $LFS setdirstripe -i$mdt_index -c$stripe_count $path
+               $LFS mkdir -i$mdt_index -c$stripe_count $path ||
+                       error "mkdir -i $mdt_index -c$stripe_count $path failed"
        fi
 }
 
-# find the smallest and not in use file descriptor
+# free_fd: find the smallest and not in use file descriptor [above @last_fd]
+#
+# If called many times, passing @last_fd will avoid repeated searching
+# already-open FDs repeatedly if we know they are still in use.
+#
+# usage: free_fd [last_fd]
 free_fd()
 {
-        local max_fd=$(ulimit -n)
-        local fd=3
-        while [[ $fd -le $max_fd && -e /proc/self/fd/$fd ]]; do
-                ((++fd))
-        done
-        [ $fd -lt $max_fd ] || error "finding free file descriptor failed"
-        echo $fd
+       local max_fd=$(ulimit -n)
+       local fd=$((${1:-2} + 1))
+
+       while [[ $fd -le $max_fd && -e /proc/self/fd/$fd ]]; do
+               ((++fd))
+       done
+       [ $fd -lt $max_fd ] || error "finding free file descriptor failed"
+       echo $fd
 }
 
 check_mount_and_prep()
@@ -7605,3 +8695,619 @@ killall_process () {
 
        do_nodes $clients "killall $signal $name"
 }
+
+lsnapshot_create()
+{
+       do_facet mgs "$LCTL snapshot_create -F $FSNAME $*"
+}
+
+lsnapshot_destroy()
+{
+       do_facet mgs "$LCTL snapshot_destroy -F $FSNAME $*"
+}
+
+lsnapshot_modify()
+{
+       do_facet mgs "$LCTL snapshot_modify -F $FSNAME $*"
+}
+
+lsnapshot_list()
+{
+       do_facet mgs "$LCTL snapshot_list -F $FSNAME $*"
+}
+
+lsnapshot_mount()
+{
+       do_facet mgs "$LCTL snapshot_mount -F $FSNAME $*"
+}
+
+lsnapshot_umount()
+{
+       do_facet mgs "$LCTL snapshot_umount -F $FSNAME $*"
+}
+
+lss_err()
+{
+       local msg=$1
+
+       do_facet mgs "cat $LSNAPSHOT_LOG"
+       error $msg
+}
+
+lss_cleanup()
+{
+       echo "Cleaning test environment ..."
+
+       # Every lsnapshot command takes exclusive lock with others,
+       # so can NOT destroy the snapshot during list with 'xargs'.
+       while true; do
+               local ssname=$(lsnapshot_list | grep snapshot_name |
+                       grep lss_ | awk '{ print $2 }' | head -n 1)
+               [ -z "$ssname" ] && break
+
+               lsnapshot_destroy -n $ssname -f ||
+                       lss_err "Fail to destroy $ssname by force"
+       done
+}
+
+lss_gen_conf_one()
+{
+       local facet=$1
+       local role=$2
+       local idx=$3
+
+       local host=$(facet_active_host $facet)
+       local dir=$(dirname $(facet_vdevice $facet))
+       local pool=$(zpool_name $facet)
+       local lfsname=$(zfs_local_fsname $facet)
+       local label=${FSNAME}-${role}$(printf '%04x' $idx)
+
+       do_facet mgs \
+               "echo '$host - $label zfs:${dir}/${pool}/${lfsname} - -' >> \
+               $LSNAPSHOT_CONF"
+}
+
+lss_gen_conf()
+{
+       do_facet mgs "rm -f $LSNAPSHOT_CONF"
+       echo "Generating $LSNAPSHOT_CONF on MGS ..."
+
+       if ! combined_mgs_mds ; then
+               [ $(facet_fstype mgs) != zfs ] &&
+                       skip "Lustre snapshot 1 only works for ZFS backend"
+
+               local host=$(facet_active_host mgs)
+               local dir=$(dirname $(facet_vdevice mgs))
+               local pool=$(zpool_name mgs)
+               local lfsname=$(zfs_local_fsname mgs)
+
+               do_facet mgs \
+                       "echo '$host - MGS zfs:${dir}/${pool}/${lfsname} - -' \
+                       >> $LSNAPSHOT_CONF" || lss_err "generate lss conf (mgs)"
+       fi
+
+       for num in `seq $MDSCOUNT`; do
+               [ $(facet_fstype mds$num) != zfs ] &&
+                       skip "Lustre snapshot 1 only works for ZFS backend"
+
+               lss_gen_conf_one mds$num MDT $((num - 1)) ||
+                       lss_err "generate lss conf (mds$num)"
+       done
+
+       for num in `seq $OSTCOUNT`; do
+               [ $(facet_fstype ost$num) != zfs ] &&
+                       skip "Lustre snapshot 1 only works for ZFS backend"
+
+               lss_gen_conf_one ost$num OST $((num - 1)) ||
+                       lss_err "generate lss conf (ost$num)"
+       done
+
+       do_facet mgs "cat $LSNAPSHOT_CONF"
+}
+
+# Parse 'lfs getstripe -d <path_with_dir_name>' for non-composite dir
+parse_plain_dir_param()
+{
+       local invalues=($1)
+       local param=""
+
+       if [[ ${invalues[0]} =~ "stripe_count:" ]]; then
+               param="-c ${invalues[1]}"
+       fi
+       if [[ ${invalues[2]} =~ "stripe_size:" ]]; then
+               param="$param -S ${invalues[3]}"
+       fi
+       if [[ ${invalues[4]} =~ "pattern:" ]]; then
+               if [[ ${invalues[5]} =~ "stripe_offset:" ]]; then
+                       param="$param -i ${invalues[6]}"
+               else
+                       param="$param -L ${invalues[5]} -i ${invalues[7]}"
+               fi
+       elif [[ ${invalues[4]} =~ "stripe_offset:" ]]; then
+               param="$param -i ${invalues[5]}"
+       fi
+       echo "$param"
+}
+
+parse_plain_param()
+{
+       local line=$1
+       local val=$(awk '{print $2}' <<< $line)
+
+       if [[ $line =~ ^"lmm_stripe_count:" ]]; then
+               echo "-c $val"
+       elif [[ $line =~ ^"lmm_stripe_size:" ]]; then
+               echo "-S $val"
+       elif [[ $line =~ ^"lmm_stripe_offset:" ]]; then
+               echo "-i $val"
+       elif [[ $line =~ ^"lmm_pattern:" ]]; then
+               echo "-L $val"
+       fi
+}
+
+parse_layout_param()
+{
+       local mode=""
+       local val=""
+       local param=""
+
+       while read line; do
+               if [[ ! -z $line ]]; then
+                       if [[ -z $mode ]]; then
+                               if [[ $line =~ ^"stripe_count:" ]]; then
+                                       mode="plain_dir"
+                               elif [[ $line =~ ^"lmm_stripe_count:" ]]; then
+                                       mode="plain_file"
+                               elif [[ $line =~ ^"lcm_layout_gen:" ]]; then
+                                       mode="pfl"
+                               fi
+                       fi
+
+                       if [[ $mode = "plain_dir" ]]; then
+                               param=$(parse_plain_dir_param "$line")
+                       elif [[ $mode = "plain_file" ]]; then
+                               val=$(parse_plain_param "$line")
+                               [[ ! -z $val ]] && param="$param $val"
+                       elif [[ $mode = "pfl" ]]; then
+                               val=$(echo $line | awk '{print $2}')
+                               if [[ $line =~ ^"lcme_extent.e_end:" ]]; then
+                                       if [[ $val = "EOF" ]]; then
+                                               param="$param -E -1"
+                                       else
+                                               param="$param -E $val"
+                                       fi
+                               elif [[ $line =~ ^"stripe_count:" ]]; then
+                                       # pfl dir
+                                       val=$(parse_plain_dir_param "$line")
+                                       param="$param $val"
+                               else
+                                       #pfl file
+                                       val=$(parse_plain_param "$line")
+                                       [[ ! -z $val ]] && param="$param $val"
+                               fi
+                       fi
+               fi
+       done
+       echo "$param"
+}
+
+get_layout_param()
+{
+       local param=$($LFS getstripe -d $1 | parse_layout_param)
+       echo "$param"
+}
+
+lfsck_verify_pfid()
+{
+       local f
+       local rc=0
+
+       # Cancel locks before setting lfsck_verify_pfid so that errors are more
+        # controllable
+       cancel_lru_locks mdc
+       cancel_lru_locks osc
+
+       # make sure PFID is set correctly for files
+       do_nodes $(comma_list $(osts_nodes)) \
+              "$LCTL set_param -n obdfilter.${FSNAME}-OST*.lfsck_verify_pfid=1"
+
+       for f in "$@"; do
+               cat $f &> /dev/nullA ||
+                       { rc=$?; echo "verify $f failed"; break; }
+       done
+
+       do_nodes $(comma_list $(osts_nodes)) \
+              "$LCTL set_param -n obdfilter.${FSNAME}-OST*.lfsck_verify_pfid=0"
+       return $rc
+}
+
+# check that clients "oscs" was evicted after "before"
+check_clients_evicted() {
+       local before=$1
+       shift
+       local oscs=${@}
+       local osc
+       local rc=0
+
+       for osc in $oscs; do
+               ((rc++))
+               echo "Check state for $osc"
+               local evicted=$(do_facet client $LCTL get_param osc.$osc.state |
+                       tail -n 3 | awk -F"[ [,]" \
+                       '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
+               if (($? == 0)) && (($evicted > $before)); then
+                       echo "$osc is evicted at $evicted"
+                       ((rc--))
+               fi
+       done
+
+       [ $rc -eq 0 ] || error "client not evicted from OST"
+}
+
+# check that clients OSCS current_state is FULL
+check_clients_full() {
+       local timeout=$1
+       shift
+       local oscs=${@}
+
+       for osc in $oscs; do
+               wait_update_facet client \
+                       "lctl get_param -n osc.$osc.state |
+                       grep 'current_state: FULL'" \
+                       "current_state: FULL" $timeout
+               [ $? -eq 0 ] || error "$osc state is not FULL"
+       done
+}
+
+#Changelogs
+__changelog_deregister() {
+       local facet=$1
+       local mdt="$(facet_svc $facet)"
+       local cl_user=$2
+       local rc=0
+
+       # skip cleanup if no user registered for this MDT
+       [ -z "$cl_user" ] && echo "$mdt: no changelog user" && return 0
+       # user is no longer registered, skip cleanup
+       changelog_users "$facet" | grep -q "$cl_user" ||
+               { echo "$mdt: changelog user '$cl_user' not found"; return 0; }
+
+       # From this point, if any operation fails, it is an error
+       __changelog_clear $facet $cl_user 0 ||
+               error_noexit "$mdt: changelog_clear $cl_user 0 fail: $rc"
+       do_facet $facet $LCTL --device $mdt changelog_deregister $cl_user ||
+               error_noexit "$mdt: changelog_deregister '$cl_user' fail: $rc"
+}
+
+declare -Ax CL_USERS
+changelog_register() {
+       for M in $(seq $MDSCOUNT); do
+               local facet=mds$M
+               local mdt="$(facet_svc $facet)"
+               local cl_mask
+
+               cl_mask=$(do_facet $facet $LCTL get_param \
+                            mdd.${mdt}.changelog_mask -n)
+               stack_trap "do_facet $facet $LCTL \
+                       set_param mdd.$mdt.changelog_mask=\'$cl_mask\' -n" EXIT
+               do_facet $facet $LCTL set_param mdd.$mdt.changelog_mask=+hsm ||
+                       error "$mdt: changelog_mask=+hsm failed: $?"
+
+               local cl_user
+               cl_user=$(do_facet $facet \
+                                 $LCTL --device $mdt changelog_register -n) ||
+                       error "$mdt: register changelog user failed: $?"
+               stack_trap "__changelog_deregister $facet $cl_user" EXIT
+
+               stack_trap "CL_USERS[$facet]='${CL_USERS[$facet]}'" EXIT
+               # Bash does not support nested arrays, but the format of a
+               # cl_user is constrained enough to use whitespaces as separators
+               CL_USERS[$facet]+="$cl_user "
+       done
+       echo "Registered $MDSCOUNT changelog users: '${CL_USERS[@]% }'"
+}
+
+changelog_deregister() {
+       local cl_user
+       # bash assoc arrays do not guarantee to list keys in created order
+       # so reorder to get same order than in changelog_register()
+       local cl_facets=$(echo "${!CL_USERS[@]}" | tr " " "\n" | sort |
+                         tr "\n" " ")
+
+       for facet in $cl_facets; do
+               for cl_user in ${CL_USERS[$facet]}; do
+                       __changelog_deregister $facet $cl_user || return $?
+               done
+               unset CL_USERS[$facet]
+       done
+}
+
+changelog_users() {
+       local facet=$1
+       local service=$(facet_svc $facet)
+
+       do_facet $facet $LCTL get_param -n mdd.$service.changelog_users
+}
+
+changelog_user_rec() {
+       local facet=$1
+       local cl_user=$2
+       local service=$(facet_svc $facet)
+
+       changelog_users $facet | awk '$1 == "'$cl_user'" { print $2 }'
+}
+
+changelog_chmask() {
+       local mask=$1
+
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param mdd.*.changelog_mask="$mask"
+}
+
+# usage: __changelog_clear FACET CL_USER [+]INDEX
+__changelog_clear()
+{
+       local facet=$1
+       local mdt="$(facet_svc $facet)"
+       local cl_user=$2
+       local -i rec
+
+       case "$3" in
+       +*)
+               # Remove the leading '+'
+               rec=${3:1}
+               rec+=$(changelog_user_rec $facet $cl_user)
+               ;;
+       *)
+               rec=$3
+               ;;
+       esac
+
+       if [ $rec -eq 0 ]; then
+               echo "$mdt: clear the changelog for $cl_user of all records"
+       else
+               echo "$mdt: clear the changelog for $cl_user to record #$rec"
+       fi
+       $LFS changelog_clear $mdt $cl_user $rec
+}
+
+# usage: changelog_clear [+]INDEX
+#
+# If INDEX is prefixed with '+', increment every changelog user's record index
+# by INDEX. Otherwise, clear the changelog up to INDEX for every changelog
+# users.
+changelog_clear() {
+       local rc
+       # bash assoc arrays do not guarantee to list keys in created order
+       # so reorder to get same order than in changelog_register()
+       local cl_facets=$(echo "${!CL_USERS[@]}" | tr " " "\n" | sort |
+                         tr "\n" " ")
+
+       for facet in $cl_facets; do
+               for cl_user in ${CL_USERS[$facet]}; do
+                       __changelog_clear $facet $cl_user $1 || rc=${rc:-$?}
+               done
+       done
+
+       return ${rc:-0}
+}
+
+changelog_dump() {
+       for M in $(seq $MDSCOUNT); do
+               local facet=mds$M
+               local mdt="$(facet_svc $facet)"
+
+               $LFS changelog $mdt | sed -e 's/^/'$mdt'./'
+       done
+}
+
+changelog_extract_field() {
+       local cltype=$1
+       local file=$2
+       local identifier=$3
+
+       changelog_dump | gawk "/$cltype.*$file$/ {
+               print gensub(/^.* "$identifier'(\[[^\]]*\]).*$/,"\\1",1)}' |
+               tail -1
+}
+
+# Prints a changelog record produced by "lfs changelog" as an associative array
+#
+# Example:
+# $> changelog2array 16 01CREAT 10:28:46.968438800 2018.03.09 0x0 \
+#                    t=[0x200000401:0x10:0x0] j=touch.501 ef=0xf u=501:501 \
+#                    nid=0@lo p=[0x200000007:0x1:0x0] blob
+# ([index]='16' [type]='CREAT' [time]='10:28:46.968438800'
+#  [date]='2018.03.09' [flags]=0x0 ['target-fid']='[0x200000401:0x10:0x0]'
+#  ['jobid']='touch.501' ['extra-flags']='0x0f' [uid]='0' ['gid']='0'
+#  ['nid']='0@lo' ['parent-fid']='[0x200000007:0x1:0x0]')
+#
+# Note that the changelog record is not quoted
+# Also note that the line breaks in the output were only added for readability
+#
+# Typically, you want to eval the output of the command to fill an actual
+# associative array, like this:
+# $> eval declare -A changelog=$(changelog2array $entry)
+#
+# It can then be accessed like any bash associative array:
+# $> echo "${changelog[index]}" "${changelog[type]}" "${changelog[flags]}"
+# 16 CREAT 0x0
+# $> echo "${changelog[uid]}":"${changelog[gid]}"
+# 501:501
+#
+changelog2array()
+{
+       # Start the array
+       printf '('
+
+       # A changelog, as printed by "lfs changelog" typically looks like this:
+       # <index> <type> <time> <date> <flags> <key1=value1> <key2=value2> ...
+
+       # Parse the positional part of the changelog
+
+       # changelog_dump() prefixes records with their mdt's name
+       local index="${1##*.}"
+
+       printf "[index]='%s' [type]='%s' [time]='%s' [date]='%s' [flags]='%s'" \
+              "$index" "${2:2}" "$3" "$4" "$5"
+
+       # Parse the key/value part of the changelog
+       for arg in "${@:5}"; do
+               # Check it matches a key=value syntax
+               [[ "$arg" =~ ^[[:alpha:]]+= ]] || continue
+
+               local key="${arg%%=*}"
+               local value="${arg#*=}"
+
+               case "$key" in
+               u)
+                       # u is actually for uid AND gid: u=UID:GID
+                       printf " [uid]='%s'" "${value%:*}"
+                       key=gid
+                       value="${value#*:}"
+                       ;;
+               t)
+                       key=target-fid
+                       value="${value#[}"
+                       value="${value%]}"
+                       ;;
+               j)
+                       key=jobid
+                       ;;
+               p)
+                       key=parent-fid
+                       value="${value#[}"
+                       value="${value%]}"
+                       ;;
+               ef)
+                       key=extra-flags
+                       ;;
+               m)
+                       key=mode
+                       ;;
+               x)
+                       key=xattr
+                       ;;
+               *)
+                       ;;
+               esac
+
+               printf " ['%s']='%s'" "$key" "$value"
+       done
+
+       # end the array
+       printf ')'
+}
+
+# Format and print a changelog record
+#
+# Interpreted sequences are:
+#      %%      a single %
+#      %f      the "flags" attribute of a changelog record
+__changelog_printf()
+{
+       local format="$1"
+
+       local -i i
+       for ((i = 0; i < ${#format}; i++)); do
+               local char="${format:$i:1}"
+               if [ "$char" != % ]; then
+                       printf '%c' "$char"
+                       continue
+               fi
+
+               i+=1
+               char="${format:$i:1}"
+               case "$char" in
+               f)
+                       printf '%s' "${changelog[flags]}"
+                       ;;
+               %)
+                       printf '%'
+                       ;;
+               esac
+       done
+       printf '\n'
+}
+
+# Filter changelog records
+changelog_find()
+{
+       local -A filter
+       local action='print'
+       local format
+
+       while [ $# -gt 0 ]; do
+               case "$1" in
+               -print)
+                       action='print'
+                       ;;
+               -printf)
+                       action='printf'
+                       format="$2"
+                       shift
+                       ;;
+               -*)
+                       filter[${1#-}]="$2"
+                       shift
+                       ;;
+               esac
+               shift
+       done
+
+       local found=false
+       local record
+       changelog_dump | { while read -r record; do
+               eval local -A changelog=$(changelog2array $record)
+               for key in "${!filter[@]}"; do
+                       case "$key" in
+                       *)
+                               [ "${changelog[$key]}" == "${filter[$key]}" ]
+                               ;;
+                       esac || continue 2
+               done
+
+               found=true
+
+               case "${action:-print}" in
+               print)
+                       printf '%s\n' "$record"
+                       ;;
+               printf)
+                       __changelog_printf "$format"
+                       ;;
+               esac
+       done; $found; }
+}
+
+restore_layout() {
+       local dir=$1
+       local layout=$2
+
+       [ ! -d "$dir" ] && return
+
+       [ -z "$layout" ] && {
+               $LFS setstripe -d $dir || error "error deleting stripe '$dir'"
+               return
+       }
+
+       setfattr -n trusted.lov -v $layout $dir ||
+               error "error restoring layout '$layout' to '$dir'"
+}
+
+# save the layout of a directory, the returned string will be used by
+# restore_layout() to restore the layout
+save_layout() {
+       local dir=$1
+       local str=$(getfattr -n trusted.lov --absolute-names -e hex $dir \
+                   2> /dev/null | awk -F'=' '/trusted.lov/{ print $2 }')
+       echo "$str"
+}
+
+# save layout of a directory and restore it at exit
+save_layout_restore_at_exit() {
+       local dir=$1
+       local layout=$(save_layout $dir)
+
+       stack_trap "restore_layout $dir $layout" EXIT
+}