export GSS=${GSS:-false}
export GSS_SK=${GSS_SK:-false}
export GSS_KRB5=false
-export GSS_PIPEFS=false
export SHARED_KEY=${SHARED_KEY:-false}
export SK_PATH=${SK_PATH:-/tmp/test-framework-keys}
export SK_OM_PATH=$SK_PATH'/tmp-request-mount'
export SK_S2SNM=${SK_S2SNM:-TestFrameNM}
export SK_S2SNMCLI=${SK_S2SNMCLI:-TestFrameNMCli}
export SK_SKIPFIRST=${SK_SKIPFIRST:-true}
-export IDENTITY_UPCALL=default
+# whether identity upcall is enabled (true), disabled (false), or default
+export IDENTITY_UPCALL=${IDENTITY_UPCALL:-default}
export QUOTA_AUTO=1
export FLAKEY=${FLAKEY:-true}
# specify environment variable containing batch job name for server statistics
export LSNAPSHOT_CONF="/etc/ldev.conf"
export LSNAPSHOT_LOG="/var/log/lsnapshot.log"
+export DATA_SEQ_MAX_WIDTH=0x1ffffff
+
# sles12 umount has a issue with -d option
[ -e /etc/SuSE-release ] && grep -w VERSION /etc/SuSE-release | grep -wq 12 && {
export UMOUNT="umount"
EXCEPT_LIST_FILE=${EXCEPT_LIST_FILE:-${LUSTRE_TESTS_CFG_DIR}/tests-to-skip.sh}
if [ -f "$EXCEPT_LIST_FILE" ]; then
- echo "Reading test skip list from $EXCEPT_LIST_FILE"
- cat $EXCEPT_LIST_FILE
- . $EXCEPT_LIST_FILE
+ echo "Reading test skip list from $EXCEPT_LIST_FILE"
+ cat $EXCEPT_LIST_FILE
+ . $EXCEPT_LIST_FILE
fi
# check config files for options in decreasing order of preference
for i in DIR DIR1 DIR2 MOUNT MOUNT1 MOUNT2
do
local path=${!i}
+
if [ -d "$path" ]; then
eval export $i=$(echo $path | sed -r 's/\/+$//g')
fi
}
usage() {
- echo "usage: $0 [-r] [-f cfgfile]"
- echo " -r: reformat"
+ echo "usage: $0 [-r] [-f cfgfile]"
+ echo " -r: reformat"
- exit
+ exit
}
print_summary () {
printf "$form" "status" "script" "Total(sec)" "E(xcluded) S(low)"
echo "---------------------------------------------------------------"
- for O in $DEFAULT_SUITES; do
- O=$(echo $O | tr "-" "_" | tr "[:lower:]" "[:upper:]")
- [ "${!O}" = "no" ] && continue || true
- local o=$(echo $O | tr "[:upper:]_" "[:lower:]-")
- local log=${TMP}/${o}.log
- if is_sanity_benchmark $o; then
- log=${TMP}/sanity-benchmark.log
- fi
- local slow=
- local skipped=
- local total=
- local status=Unfinished
- if [ -f $log ]; then
- skipped=$(grep excluded $log | awk '{ printf " %s", $3 }' |
- sed 's/test_//g')
- slow=$(egrep "^PASS|^FAIL" $log | tr -d "("| sed s/s\)$//g |
- sort -nr -k 3 | head -n5 | awk '{ print $2":"$3"s" }')
- total=$(grep duration $log | awk '{ print $2 }')
- if [ "${!O}" = "done" ]; then
- status=Done
- fi
- if $DDETAILS; then
- local durations=$(egrep "^PASS|^FAIL" $log |
- tr -d "("| sed s/s\)$//g |
- awk '{ print $2":"$3"|" }')
- details=$(printf "%s\n%s %s %s\n" "$details" \
- "DDETAILS" "$O" "$(echo $durations)")
- fi
- fi
- printf "$form" $status "$O" "${total}" "E=$skipped"
- printf "$form" "-" "-" "-" "S=$(echo $slow)"
- done
-
- for O in $DEFAULT_SUITES; do
- O=$(echo $O | tr "-" "_" | tr "[:lower:]" "[:upper:]")
- if [ "${!O}" = "no" ]; then
- printf "$form" "Skipped" "$O" ""
- fi
- done
-
- # print the detailed tests durations if DDETAILS=true
- if $DDETAILS; then
- echo "$details"
- fi
+ for O in $DEFAULT_SUITES; do
+ O=$(echo $O | tr "-" "_" | tr "[:lower:]" "[:upper:]")
+ [ "${!O}" = "no" ] && continue || true
+ local o=$(echo $O | tr "[:upper:]_" "[:lower:]-")
+ local log=${TMP}/${o}.log
+
+ if is_sanity_benchmark $o; then
+ log=${TMP}/sanity-benchmark.log
+ fi
+ local slow=
+ local skipped=
+ local total=
+ local status=Unfinished
+
+ if [ -f $log ]; then
+ skipped=$(grep excluded $log |
+ awk '{ printf " %s", $3 }' | sed 's/test_//g')
+ slow=$(egrep "^PASS|^FAIL" $log |
+ tr -d "("| sed s/s\)$//g | sort -nr -k 3 |
+ head -n5 | awk '{ print $2":"$3"s" }')
+ total=$(grep duration $log | awk '{ print $2 }')
+ if [ "${!O}" = "done" ]; then
+ status=Done
+ fi
+ if $DDETAILS; then
+ local durations=$(egrep "^PASS|^FAIL" $log |
+ tr -d "("| sed s/s\)$//g |
+ awk '{ print $2":"$3"|" }')
+ details=$(printf "%s\n%s %s %s\n" "$details" \
+ "DDETAILS" "$O" "$(echo $durations)")
+ fi
+ fi
+ printf "$form" $status "$O" "${total}" "E=$skipped"
+ printf "$form" "-" "-" "-" "S=$(echo $slow)"
+ done
+
+ for O in $DEFAULT_SUITES; do
+ O=$(echo $O | tr "-" "_" | tr "[:lower:]" "[:upper:]")
+ if [ "${!O}" = "no" ]; then
+ printf "$form" "Skipped" "$O" ""
+ fi
+ done
+
+ # print the detailed tests durations if DDETAILS=true
+ if $DDETAILS; then
+ echo "$details"
+ fi
+}
+
+reset_lustre() {
+ if $do_reset; then
+ stopall
+ setupall
+ fi
+}
+
+setup_if_needed() {
+ ! ${do_setup} && return
+ nfs_client_mode && return
+ AUSTER_CLEANUP=false
+
+ local MOUNTED=$(mounted_lustre_filesystems)
+
+ if $(echo $MOUNTED' ' | grep -w -q $MOUNT' '); then
+ check_config_clients $MOUNT
+ # init_facets_vars
+ # init_param_vars
+ return
+ fi
+
+ echo "Lustre is not mounted, trying to do setup ... "
+ $reformat && CLEANUP_DM_DEV=true formatall
+ setupall
+
+ MOUNTED=$(mounted_lustre_filesystems)
+ if ! $(echo $MOUNTED' ' | grep -w -q $MOUNT' '); then
+ echo "Lustre is not mounted after setup! "
+ exit 1
+ fi
+ AUSTER_CLEANUP=true
+}
+
+cleanup_if_needed() {
+ if $AUSTER_CLEANUP; then
+ cleanupall
+ fi
+}
+
+find_script_in_path() {
+ target=$1
+ path=$2
+ for dir in $(tr : " " <<< $path); do
+ if [ -f $dir/$target ]; then
+ echo $dir/$target
+ return 0
+ fi
+ if [ -f $dir/$target.sh ]; then
+ echo $dir/$target.sh
+ return 0
+ fi
+ done
+ return 1
+}
+
+title() {
+ log "-----============= acceptance-small: "$*" ============----- `date`"
+}
+
+doit() {
+ if $dry_run; then
+ printf "Would have run: %s\n" "$*"
+ return 0
+ fi
+ if $verbose; then
+ printf "Running: %s\n" "$*"
+ fi
+ "$@"
+}
+
+
+run_suite() {
+ local suite_name=$1
+ local suite_script=$2
+
+ title $suite_name
+ log_test $suite_name
+
+ rm -f $TF_FAIL
+ touch $TF_SKIP
+
+ local start_ts=$(date +%s)
+
+ doit $script_lang $suite_script
+
+ local rc=$?
+ local duration=$(($(date +%s) - $start_ts))
+ local status="PASS"
+
+ if [[ $rc -ne 0 || -f $TF_FAIL ]]; then
+ status="FAIL"
+ elif [[ -f $TF_SKIP ]]; then
+ status="SKIP"
+ fi
+ log_test_status $duration $status
+ [[ ! -f $TF_SKIP ]] || rm -f $TF_SKIP
+
+ reset_lustre
+
+ return $rc
+}
+
+run_suite_logged() {
+ local suite_name=${1%.sh}
+ local suite=$(echo ${suite_name} | tr "[:lower:]-" "[:upper:]_")
+
+ suite_script=$(find_script_in_path $suite_name $LUSTRE/tests)
+
+ if [[ -z $suite_script ]]; then
+ echo "Can't find test script for $suite_name"
+ return 1
+ fi
+
+ echo "run_suite $suite_name $suite_script"
+
+ local log_name=${suite_name}.suite_log.$(hostname -s).log
+
+ if $verbose; then
+ run_suite $suite_name $suite_script 2>&1 |tee $LOGDIR/$log_name
+ else
+ run_suite $suite_name $suite_script > $LOGDIR/$log_name 2>&1
+ fi
+
+ return ${PIPESTATUS[0]}
+}
+
+reset_logging() {
+ export LOGDIR=$1
+
+ unset YAML_LOG
+ init_logging
+}
+
+split_commas() {
+ echo "${*//,/ }"
+}
+
+run_suites() {
+ local n=0
+ local argv=("$@")
+
+ while ((n < repeat_count)); do
+ local RC=0
+ local logdir=${test_logs_dir}
+ local first_suite=$FIRST_SUITE
+
+ ((repeat_count > 1)) && logdir="$logdir/$n"
+ reset_logging $logdir
+ set -- "${argv[@]}"
+ while [[ -n $1 ]]; do
+ unset ONLY EXCEPT START_AT STOP_AT
+ local opts=""
+ local time_limit=""
+
+ suite=$1
+ shift;
+ while [[ -n $1 ]]; do
+ case "$1" in
+ --only)
+ shift;
+ export ONLY=$(split_commas $1)
+
+ opts+="ONLY=$ONLY ";;
+ --suite)
+ shift;
+ export SUITE=$(split_commas $1)
+
+ opts+="SUITE=$SUITE ";;
+ --pattern)
+ shift;
+ export PATTERN=$(split_commas $1)
+
+ opts+="PATTERN=$PATTERN ";;
+ --except)
+ shift;
+ export EXCEPT=$(split_commas $1)
+
+ opts+="EXCEPT=$EXCEPT ";;
+ --start-at)
+ shift;
+ export START_AT=$1
+
+ opts+="START_AT=$START_AT ";;
+ --stop-at)
+ shift;
+ export STOP_AT=$1
+
+ opts+="STOP_AT=$STOP_AT ";;
+ --time-limit)
+ shift;
+ time_limit=$1;;
+ *)
+ break;;
+ esac
+ shift
+ done
+
+ # If first_suite not set or this is the first suite
+ if [ "x"$first_suite == "x" ] || [ $first_suite == $suite ]; then
+ echo "running: $suite $opts"
+ run_suite_logged $suite || RC=$?
+ unset first_suite
+ echo $suite returned $RC
+ fi
+ done
+ if $upload_logs; then
+ $upload_script $LOGDIR
+ fi
+ n=$((n + 1))
+ done
}
# Get information about the Lustre environment. The information collected
# output: No return values, environment variables are exported
get_lustre_env() {
-
- export mds1_FSTYPE=${mds1_FSTYPE:-$(facet_fstype mds1)}
- export ost1_FSTYPE=${ost1_FSTYPE:-$(facet_fstype ost1)}
-
- export MGS_VERSION=$(lustre_version_code mgs)
- export MDS1_VERSION=$(lustre_version_code mds1)
- export OST1_VERSION=$(lustre_version_code ost1)
- export CLIENT_VERSION=$(lustre_version_code client)
+ if ! $RPC_MODE; then
+ export mds1_FSTYPE=${mds1_FSTYPE:-$(facet_fstype mds1)}
+ export ost1_FSTYPE=${ost1_FSTYPE:-$(facet_fstype ost1)}
+
+ export MGS_VERSION=$(lustre_version_code mgs)
+ export MDS1_VERSION=$(lustre_version_code mds1)
+ export OST1_VERSION=$(lustre_version_code ost1)
+ export CLIENT_VERSION=$(lustre_version_code client)
+
+ # import server-side version information into local variables
+ # so they can be used in tests instead of checked separately
+ # MGS_OS_VERSION_ID, MGS_OS_ID, MGS_OS_ID_LIKE,
+ # MDS1_OS_VERSION_ID, MDS1_OS_ID, MDS1_OS_ID_LIKE,
+ # OST1_OS_VERSION_ID, OST1_OS_ID, OST1_OS_ID_LIKE,
+ # CLIENT_OS_VERSION_ID, CLIENT_OS_ID, CLIENT_OS_ID_LIKE
+ lustre_os_release "eval export" mgs
+ lustre_os_release "eval export" mds1
+ lustre_os_release "eval export" ost1
+ lustre_os_release "eval export" client
+ fi
# Prefer using "mds1" directly instead of SINGLEMDS.
# Keep this for compat until it is removed from scripts.
export KEEP_ZPOOL=${KEEP_ZPOOL:-false}
export CLEANUP_DM_DEV=false
export PAGE_SIZE=$(get_page_size client)
+ export NAME=${NAME:-local}
+
+ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
export MKE2FS=$MKE2FS
if [ -z "$MKE2FS" ]; then
fi
export LST=${LST:-"$LUSTRE/../lnet/utils/lst"}
[ ! -f "$LST" ] && export LST=$(which lst)
+ export LSTSH=${LSTSH:-"$LUSTRE/../lustre-iokit/lst-survey/lst.sh"}
+ [ ! -f "$LSTSH" ] && export LSTSH=$(which lst.sh)
export SGPDDSURVEY=${SGPDDSURVEY:-"$LUSTRE/../lustre-iokit/sgpdd-survey/sgpdd-survey")}
[ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey)
export MCREATE=${MCREATE:-mcreate}
export MULTIOP=${MULTIOP:-multiop}
+ export MMAP_CAT=${MMAP_CAT:-mmap_cat}
export STATX=${STATX:-statx}
# Ubuntu, at least, has a truncate command in /usr/bin
# so fully path our truncate command.
if ! echo $PATH | grep -q $LUSTRE/tests/mpi; then
export PATH=$LUSTRE/tests/mpi:$PATH
fi
- export RSYNC_RSH=${RSYNC_RSH:-rsh}
+ export LNETCTL=${LNETCTL:-"$LUSTRE/../lnet/utils/lnetctl"}
+ [ ! -f "$LNETCTL" ] && export LNETCTL=$(which lnetctl 2> /dev/null)
export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
[ ! -f "$LCTL" ] && export LCTL=$(which lctl)
export LFS=${LFS:-"$LUSTRE/utils/lfs"}
[ ! -f "$LFS" ] && export LFS=$(which lfs)
+ export KSOCKLND_CONFIG=${KSOCKLND_CONFIG:-"$LUSTRE/scripts/ksocklnd-config"}
+ [ ! -f "$KSOCKLND_CONFIG" ] &&
+ export KSOCKLND_CONFIG=$(which ksocklnd-config 2> /dev/null)
- export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"}
+ export PERM_CMD=$(echo ${PERM_CMD:-"$LCTL conf_param"})
export L_GETIDENTITY=${L_GETIDENTITY:-"$LUSTRE/utils/l_getidentity"}
if [ ! -f "$L_GETIDENTITY" ]; then
- if `which l_getidentity > /dev/null 2>&1`; then
+ if $(which l_getidentity > /dev/null 2>&1); then
export L_GETIDENTITY=$(which l_getidentity)
else
export L_GETIDENTITY=NONE
fi
fi
export LL_DECODE_FILTER_FID=${LL_DECODE_FILTER_FID:-"$LUSTRE/utils/ll_decode_filter_fid"}
- [ ! -f "$LL_DECODE_FILTER_FID" ] && export LL_DECODE_FILTER_FID="ll_decode_filter_fid"
+ [ ! -f "$LL_DECODE_FILTER_FID" ] &&
+ export LL_DECODE_FILTER_FID="ll_decode_filter_fid"
export LL_DECODE_LINKEA=${LL_DECODE_LINKEA:-"$LUSTRE/utils/ll_decode_linkea"}
- [ ! -f "$LL_DECODE_LINKEA" ] && export LL_DECODE_LINKEA="ll_decode_linkea"
+ [ ! -f "$LL_DECODE_LINKEA" ] &&
+ export LL_DECODE_LINKEA="ll_decode_linkea"
export MKFS=${MKFS:-"$LUSTRE/utils/mkfs.lustre"}
[ ! -f "$MKFS" ] && export MKFS="mkfs.lustre"
export TUNEFS=${TUNEFS:-"$LUSTRE/utils/tunefs.lustre"}
[ ! -f "$LSOM_SYNC" ] &&
export LSOM_SYNC=$(which llsom_sync 2> /dev/null)
[ -z "$LSOM_SYNC" ] && export LSOM_SYNC="/usr/sbin/llsom_sync"
- export NAME=${NAME:-local}
- export LGSSD=${LGSSD:-"$LUSTRE/utils/gss/lgssd"}
- [ "$GSS_PIPEFS" = "true" ] && [ ! -f "$LGSSD" ] &&
- export LGSSD=$(which lgssd)
+ export L_GETAUTH=${L_GETAUTH:-"$LUSTRE/utils/gss/l_getauth"}
+ [ ! -f "$L_GETAUTH" ] && export L_GETAUTH=$(which l_getauth 2> /dev/null)
export LSVCGSSD=${LSVCGSSD:-"$LUSTRE/utils/gss/lsvcgssd"}
[ ! -f "$LSVCGSSD" ] && export LSVCGSSD=$(which lsvcgssd 2> /dev/null)
export KRB5DIR=${KRB5DIR:-"/usr/kerberos"}
if $SHARED_KEY; then
$RPC_MODE || echo "Using GSS shared-key feature"
- which lgss_sk > /dev/null 2>&1 ||
+ [ -n "$LGSS_SK" ] ||
+ export LGSS_SK=$(which lgss_sk 2> /dev/null)
+ [ -n "$LGSS_SK" ] ||
+ export LGSS_SK="$LUSTRE/utils/gss/lgss_sk"
+ [ -n "$LGSS_SK" ] ||
error_exit "built with lgss_sk disabled! SEC=$SEC"
GSS=true
GSS_SK=true
;;
esac
- case "x$IDUP" in
- xtrue)
- IDENTITY_UPCALL=true
- ;;
- xfalse)
- IDENTITY_UPCALL=false
- ;;
- esac
-
export LOAD_MODULES_REMOTE=${LOAD_MODULES_REMOTE:-false}
# Paths on remote nodes, if different
# Constants used in more than one test script
export LOV_MAX_STRIPE_COUNT=2000
+ export LMV_MAX_STRIPES_PER_MDT=5
+ export DELETE_OLD_POOLS=${DELETE_OLD_POOLS:-false}
+ export KEEP_POOLS=${KEEP_POOLS:-false}
+ export PARALLEL=${PARALLEL:-"no"}
+ export BLCKSIZE=${BLCKSIZE:-4096}
export MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
- . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
get_lustre_env
# use localrecov to enable recovery for local clients, LU-12722
- [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] ||
+ [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] || {
export MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o localrecov"}
+ export MGS_MOUNT_OPTS=${MGS_MOUNT_OPTS:-"-o localrecov"}
+ }
+
[[ $OST1_VERSION -lt $(version_code 2.13.52) ]] ||
export OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o localrecov"}
}
# Return a numeric version code based on a version string. The version
# code is useful for comparison two version strings to see which is newer.
version_code() {
- # split arguments like "1.8.6-wc3" into "1", "8", "6", "wc3"
- eval set -- $(tr "[:punct:]" " " <<< $*)
+ # split arguments like "1.8.6-wc3" into "1", "8", "6", "3"
+ eval set -- $(tr "[:punct:][a-zA-Z]" " " <<< $*)
- echo -n $(((${1:-0} << 16) | (${2:-0} << 8) | ${3:-0}))
+ echo -n $(((${1:-0}<<24) | (${2:-0}<<16) | (${3:-0}<<8) | (${4:-0})))
}
export LINUX_VERSION=$(uname -r | sed -e "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/")
version_code $(lustre_build_version $1)
}
+# Extract the server-side /etc/os-release information into local variables
+# usage: lustre_os_release <facet>
+# generates $facet_OS_ID, $facet_OS_ID_LIKE, $facet_VERSION_ID
+# and also $facet_OS_VERSION_CODE=$(version_code $facet_VERSION_ID)
+lustre_os_release() {
+ local action=${1:-echo}
+ local facet=$2
+ local FACET_OS=$(tr "[:lower:]" "[:upper:]" <<<$facet)_OS_
+
+ [[ "$action" == "echo" ]] &&
+ echo "$facet: $(do_facet $facet "cat /etc/system-release")"
+ do_facet $facet "[[ -r /etc/os-release ]] || ls -s /etc/*release" 1>&2
+
+ while read LINE; do
+ case $LINE in
+ VERSION_ID=*|ID=*|ID_LIKE=*) $action ${FACET_OS}$LINE ;;
+ esac
+ done < <(do_facet $facet "cat /etc/os-release")
+
+ [[ "$action" == "echo" ]] && return 0
+
+ local facet_version=${FACET_OS}VERSION
+ $action ${facet_version}_CODE=\$\(version_code \$${facet_version}_ID\)
+}
+
module_loaded () {
/sbin/lsmod | grep -q "^\<$1\>"
}
+check_lfs_df_ret_val() {
+ # Ignore only EOPNOTSUPP (which is 95; Operation not supported) error
+ # returned by 'lfs df' for valid dentry but not a lustrefs.
+ #
+ # 'lfs df' historically always returned success(0) instead of
+ # EOPNOTSUPP. This function for compatibility reason, ignores and
+ # masquerades EOPNOTSUPP as success.
+ [[ $1 -eq 95 ]] && return 0
+ return $1
+}
+
PRLFS=false
lustre_insmod() {
local module=$1
fi
}
-load_modules_local() {
- if [ -n "$MODPROBE" ]; then
- # use modprobe
- echo "Using modprobe to load modules"
- return 0
- fi
-
- # Create special udev test rules on every node
- if [ -f $LUSTRE/lustre/conf/99-lustre.rules ]; then {
- sed -e 's|/usr/sbin/lctl|$LCTL|g' $LUSTRE/lustre/conf/99-lustre.rules > /etc/udev/rules.d/99-lustre-test.rules
- } else {
- echo "SUBSYSTEM==\"lustre\", ACTION==\"change\", ENV{PARAM}==\"?*\", RUN+=\"$LCTL set_param '\$env{PARAM}=\$env{SETTING}'\"" > /etc/udev/rules.d/99-lustre-test.rules
- } fi
- udevadm control --reload-rules
- udevadm trigger
+do_lnetctl() {
+ $LCTL mark "$LNETCTL $*"
+ echo "$LNETCTL $*"
+ $LNETCTL "$@"
+}
+load_lnet() {
# For kmemleak-enabled kernels we need clear all past state
# that obviously has nothing to do with this Lustre run
# Disable automatic memory scanning to avoid perf hit.
if [ -f /sys/kernel/debug/kmemleak ] ; then
- echo scan=off > /sys/kernel/debug/kmemleak
- echo scan > /sys/kernel/debug/kmemleak
- echo clear > /sys/kernel/debug/kmemleak
+ echo scan=off > /sys/kernel/debug/kmemleak || true
+ echo scan > /sys/kernel/debug/kmemleak || true
+ echo clear > /sys/kernel/debug/kmemleak || true
fi
echo Loading modules from $LUSTRE
else
ncpus=$(getconf _NPROCESSORS_CONF 2>/dev/null)
local rc=$?
+
if [ $rc -eq 0 ]; then
echo "detected $ncpus online CPUs by getconf"
else
# partitions. So we just force libcfs to create 2 partitions for
# system with 2 or 4 cores
local saved_opts="$MODOPTS_LIBCFS"
+
if [ $ncpus -le 4 ] && [ $ncpus -gt 1 ]; then
# force to enable multiple CPU partitions
echo "Force libcfs to create 2 CPU partitions"
load_module ../libcfs/libcfs/libcfs
# Prevent local MODOPTS_LIBCFS being passed as part of environment
# variable to remote nodes
- MODOPTS_LIBCFS=$saved_opts
+ unset MODOPTS_LIBCFS
- set_default_debug
- load_module ../lnet/lnet/lnet
+ set_default_debug "neterror net nettrace malloc"
+ if [ "$1" = "config_on_load=1" ]; then
+ load_module ../lnet/lnet/lnet
+ else
+ load_module ../lnet/lnet/lnet "$@"
+ fi
LNDPATH=${LNDPATH:-"../lnet/klnds"}
if [ -z "$LNETLND" ]; then
case $NETTYPE in
- o2ib*) LNETLND="o2iblnd/ko2iblnd" ;;
- tcp*) LNETLND="socklnd/ksocklnd" ;;
- *) local lnd="${NETTYPE%%[0-9]}lnd"
+ o2ib*) LNETLND="o2iblnd/ko2iblnd" ;;
+ tcp*) LNETLND="socklnd/ksocklnd" ;;
+ kfi*) LNETLND="kfilnd/kkfilnd" ;;
+ gni*) LNETLND="gnilnd/kgnilnd" ;;
+ *) local lnd="${NETTYPE%%[0-9]}lnd"
[ -f "$LNDPATH/$lnd/k$lnd.ko" ] &&
LNETLND="$lnd/k$lnd" ||
LNETLND="socklnd/ksocklnd"
esac
fi
load_module ../lnet/klnds/$LNETLND
+
+ if [ "$1" = "config_on_load=1" ]; then
+ do_lnetctl lnet configure --all ||
+ return $?
+ fi
+}
+
+load_modules_local() {
+ if [ -n "$MODPROBE" ]; then
+ # use modprobe
+ echo "Using modprobe to load modules"
+ return 0
+ fi
+
+ # Create special udev test rules on every node
+ if [ -f $LUSTRE/lustre/conf/99-lustre.rules ]; then {
+ sed -e 's|/usr/sbin/lctl|$LCTL|g' $LUSTRE/lustre/conf/99-lustre.rules > /etc/udev/rules.d/99-lustre-test.rules
+ } else {
+ echo "SUBSYSTEM==\"lustre\", ACTION==\"change\", ENV{PARAM}==\"?*\", RUN+=\"$LCTL set_param '\$env{PARAM}=\$env{SETTING}'\"" > /etc/udev/rules.d/99-lustre-test.rules
+ } fi
+ udevadm control --reload-rules
+ udevadm trigger
+
+ load_lnet
+
load_module obdclass/obdclass
+ if ! client_only; then
+ MODOPTS_PTLRPC=${MODOPTS_PTLRPC:-"lbug_on_grant_miscount=1"}
+ fi
load_module ptlrpc/ptlrpc
load_module ptlrpc/gss/ptlrpc_gss
load_module fld/fld
load_module fid/fid
load_module lmv/lmv
load_module osc/osc
- load_module mdc/mdc
load_module lov/lov
+ load_module mdc/mdc
load_module mgc/mgc
load_module obdecho/obdecho
if ! client_only; then
load_module mgs/mgs
load_module mdd/mdd
load_module mdt/mdt
- load_module ost/ost
+ # don't fail if ost module doesn't exist
+ load_module ost/ost 2>/dev/null || true;
load_module lod/lod
- load_module osp/osp
load_module ofd/ofd
load_module osp/osp
fi
[ ! -f "$sbin_mount" ] && touch "$sbin_mount"
if [ ! -s "$sbin_mount" -a -w "$sbin_mount" ]; then
cat <<- EOF > "$sbin_mount"
- #!/bin/sh
+ #!/bin/bash
#STUB MARK
echo "This $sbin_mount just a mountpoint." 1>&2
echo "It is never supposed to be run." 1>&2
}
load_modules () {
+ local facets
+ local facet
+ local failover
load_modules_local
# bug 19124
# load modules on remote nodes optionally
# lustre-tests have to be installed on these nodes
if $LOAD_MODULES_REMOTE; then
local list=$(comma_list $(remote_nodes_list))
+
+ # include failover nodes in case they are not in the list yet
+ facets=$(get_facets)
+ for facet in ${facets//,/ }; do
+ failover=$(facet_failover_host $facet)
+ [ -n "$list" ] && [[ ! "$list" =~ "$failover" ]] &&
+ list="$list,$failover"
+ done
+
if [ -n "$list" ]; then
echo "loading modules on: '$list'"
do_rpc_nodes "$list" load_modules_local
}
check_mem_leak () {
- LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true)
- LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true)
- if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
- echo "$LEAK_LUSTRE" 1>&2
- echo "$LEAK_PORTALS" 1>&2
- mv $TMP/debug $TMP/debug-leak.`date +%s` || true
- echo "Memory leaks detected"
- [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true
- return 1
- fi
+ LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true)
+ LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" ||
+ true)
+ if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
+ echo "$LEAK_LUSTRE" 1>&2
+ echo "$LEAK_PORTALS" 1>&2
+ mv $TMP/debug $TMP/debug-leak.`date +%s` || true
+ echo "Memory leaks detected"
+ [ -n "$IGNORE_LEAK" ] &&
+ { echo "ignoring leaks" && return 0; } || true
+ return 1
+ fi
}
-unload_modules() {
- wait_exit_ST client # bug 12845
-
+unload_modules_local() {
$LUSTRE_RMMOD ldiskfs || return 2
[ -f /etc/udev/rules.d/99-lustre-test.rules ] &&
udevadm control --reload-rules
udevadm trigger
+ check_mem_leak || return 254
+
+ return 0
+}
+
+unload_modules() {
+ local rc=0
+
+ wait_exit_ST client # bug 12845
+
+ unload_modules_local || rc=$?
+
if $LOAD_MODULES_REMOTE; then
local list=$(comma_list $(remote_nodes_list))
if [ -n "$list" ]; then
echo "unloading modules on: '$list'"
- do_rpc_nodes "$list" $LUSTRE_RMMOD ldiskfs
- do_rpc_nodes "$list" check_mem_leak
- do_rpc_nodes "$list" "rm -f /etc/udev/rules.d/99-lustre-test.rules"
- do_rpc_nodes "$list" "udevadm control --reload-rules"
- do_rpc_nodes "$list" "udevadm trigger"
+ do_rpc_nodes "$list" unload_modules_local
fi
fi
rm -f $sbin_mount
fi
- check_mem_leak || return 254
+ [[ $rc -eq 0 ]] && echo "modules unloaded."
- echo "modules unloaded."
- return 0
+ return $rc
}
fs_log_size() {
- local facet=${1:-$SINGLEMDS}
+ local facet=${1:-ost1}
local size=0
+ local mult=$OSTCOUNT
case $(facet_fstype $facet) in
- ldiskfs) size=50;; # largest seen is 44, leave some headroom
+ ldiskfs) size=32;; # largest seen is 64 with multiple OSTs
# grant_block_size is in bytes, allow at least 2x max blocksize
zfs) size=$(lctl get_param osc.$FSNAME*.import |
awk '/grant_block_size:/ {print $2/512; exit;}')
;;
esac
- echo -n $((size * MDSCOUNT))
+ [[ $facet =~ mds ]] && mult=$MDTCOUNT
+ echo -n $((size * mult))
}
fs_inode_ksize() {
echo -n $size
}
+runas_su() {
+ local user=$1
+ local cmd=$2
+ shift 2
+ local opts="$*"
+
+ if $VERBOSE; then
+ echo Running as $user: $cmd $opts
+ fi
+ cmd=$(which $cmd)
+ su - $user -c "$cmd $opts"
+}
+
check_gss_daemon_nodes() {
- local list=$1
- dname=$2
+ local list=$1
+ local dname=$2
+ local loopmax=10
+ local loop
+ local node
+ local ret
- do_nodesv $list "num=\\\$(ps -o cmd -C $dname | grep $dname | wc -l);
+ dname=$(basename "$dname" | awk '{print $1}')
+ do_nodesv $list "num=0;
+for proc in \\\$(pgrep $dname); do
+[ \\\$(ps -o ppid= -p \\\$proc) -ne 1 ] || ((num++))
+done;
if [ \\\"\\\$num\\\" -ne 1 ]; then
echo \\\$num instance of $dname;
exit 1;
fi; "
+ ret=$?
+ (( $ret == 0 )) || return $ret
+
+ for node in ${list//,/ }; do
+ loop=0
+ while (( $loop < $loopmax )); do
+ do_nodesv $node "$L_GETAUTH -d"
+ ret=$?
+ (( $ret == 0 )) && break
+ loop=$((loop + 1))
+ sleep 5
+ done
+ (( $loop < $loopmax )) || return 1
+ done
+ return 0
}
check_gss_daemon_facet() {
- facet=$1
- dname=$2
+ local facet=$1
+ local dname=$2
- num=`do_facet $facet ps -o cmd -C $dname | grep $dname | wc -l`
- if [ $num -ne 1 ]; then
- echo "$num instance of $dname on $facet"
- return 1
- fi
- return 0
+ dname=$(basename "$dname" | awk '{print $1}')
+ num=$(do_facet $facet ps -o cmd -C $dname | grep $dname | wc -l)
+ if [ $num -ne 1 ]; then
+ echo "$num instance of $dname on $facet"
+ return 1
+ fi
+ return 0
}
send_sigint() {
- local list=$1
- shift
- echo Stopping $@ on $list
- do_nodes $list "killall -2 $@ 2>/dev/null || true"
+ local list=$1
+
+ shift
+ echo "Stopping "$@" on $list"
+ do_nodes $list "killall -2 $* 2>/dev/null || true"
}
# start gss daemons on all nodes, or "daemon" on "nodes" if set
start_gss_daemons() {
local nodes=$1
local daemon=$2
+ local options=$3
if [ "$nodes" ] && [ "$daemon" ] ; then
echo "Starting gss daemon on nodes: $nodes"
- do_nodes $nodes "$daemon" || return 8
+ do_nodes $nodes "$daemon" "$options" || return 8
+ check_gss_daemon_nodes $nodes "$daemon" || return 9
return 0
fi
echo "Starting gss daemon on mds: $nodes"
if $GSS_SK; then
# Start all versions, in case of switching
- do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z" || return 1
+ do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z $options" ||
+ return 1
else
- do_nodes $nodes "$LSVCGSSD -v" || return 1
- fi
- if $GSS_PIPEFS; then
- do_nodes $nodes "$LGSSD -v" || return 2
+ do_nodes $nodes "$LSVCGSSD -vvv $options" || return 1
fi
nodes=$(comma_list $(osts_nodes))
echo "Starting gss daemon on ost: $nodes"
if $GSS_SK; then
# Start all versions, in case of switching
- do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z" || return 3
+ do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z $options" ||
+ return 3
else
- do_nodes $nodes "$LSVCGSSD -v" || return 3
+ do_nodes $nodes "$LSVCGSSD -vvv $options" || return 3
fi
# starting on clients
local clients=${CLIENTS:-$HOSTNAME}
- if $GSS_PIPEFS; then
- echo "Starting $LGSSD on clients $clients "
- do_nodes $clients "$LGSSD -v" || return 4
- fi
-
- # wait daemons entering "stable" status
- sleep 5
#
# check daemons are running
#
nodes=$(comma_list $(mdts_nodes) $(osts_nodes))
- check_gss_daemon_nodes $nodes lsvcgssd || return 5
- if $GSS_PIPEFS; then
- nodes=$(comma_list $(mdts_nodes))
- check_gss_daemon_nodes $nodes lgssd || return 6
- fi
- if $GSS_PIPEFS; then
- check_gss_daemon_nodes $clients lgssd || return 7
- fi
+ check_gss_daemon_nodes $nodes "$LSVCGSSD" || return 5
}
stop_gss_daemons() {
add_sk_mntflag() {
# Add mount flags for shared key
local mt_opts=$@
+
if grep -q skpath <<< "$mt_opts" ; then
mt_opts=$(echo $mt_opts |
sed -e "s#skpath=[^ ,]*#skpath=$SK_PATH#")
start_gss_daemons || error_exit "start gss daemon failed! rc=$?"
fi
+ if $GSS_SK && ! $SK_NO_KEY; then
+ echo "Loading basic SSK keys on all servers"
+ do_nodes $(comma_list $(all_server_nodes)) \
+ "$LGSS_SK -t server -l $SK_PATH/$FSNAME.key || true"
+ do_nodes $(comma_list $(all_server_nodes)) \
+ "keyctl show | grep lustre | cut -c1-11 |
+ sed -e 's/ //g;' |
+ xargs -IX keyctl setperm X 0x3f3f3f3f"
+ fi
+
if $GSS_SK && $SK_NO_KEY; then
local numclients=${1:-$CLIENTCOUNT}
local clients=${CLIENTS:-$HOSTNAME}
# and S2S now requires keys as well, both for "client"
# and for "server"
if $SK_S2S; then
- lgss_sk -t server -f$FSNAME -n $SK_S2SNMCLI \
+ $LGSS_SK -t server -f$FSNAME -n $SK_S2SNMCLI \
-w $SK_PATH/$FSNAME-nmclient.key \
-d /dev/urandom >/dev/null 2>&1
- lgss_sk -t mgs,server -f$FSNAME -n $SK_S2SNM \
+ $LGSS_SK -t mgs,server -f$FSNAME -n $SK_S2SNM \
-w $SK_PATH/$FSNAME-s2s-server.key \
-d /dev/urandom >/dev/null 2>&1
fi
# basic key create
- lgss_sk -t server -f$FSNAME -w $SK_PATH/$FSNAME.key \
+ $LGSS_SK -t server -f$FSNAME -w $SK_PATH/$FSNAME.key \
-d /dev/urandom >/dev/null 2>&1
# per-nodemap keys
for i in $(seq 0 $((numclients - 1))); do
- lgss_sk -t server -f$FSNAME -n c$i \
+ $LGSS_SK -t server -f$FSNAME -n c$i \
-w $SK_PATH/nodemap/c$i.key -d /dev/urandom \
>/dev/null 2>&1
done
fi
# Set client keys to client type to generate prime P
if local_mode; then
- do_nodes $(all_nodes) "lgss_sk -t client,server -m \
+ do_nodes $(all_nodes) "$LGSS_SK -t client,server -m \
$SK_PATH/$FSNAME.key >/dev/null 2>&1"
else
- do_nodes $clients "lgss_sk -t client -m \
+ do_nodes $clients "$LGSS_SK -t client -m \
$SK_PATH/$FSNAME.key >/dev/null 2>&1"
- do_nodes $clients "find $SK_PATH/nodemap -name \*.key | \
- xargs -IX lgss_sk -t client -m X >/dev/null 2>&1"
+ do_nodes $clients "find $SK_PATH/nodemap \
+ -name \*.key | xargs -IX $LGSS_SK -t client \
+ -m X >/dev/null 2>&1"
+ # also have a client key available on server side,
+ # for local client mount
+ do_nodes $(comma_list $(all_server_nodes)) \
+ "cp $SK_PATH/$FSNAME.key $SK_PATH/${FSNAME}_cli.key && \
+ $LGSS_SK -t client -m \
+ $SK_PATH/${FSNAME}_cli.key >/dev/null 2>&1"
fi
# This is required for servers as well, if S2S in use
if $SK_S2S; then
do_nodes $(comma_list $(mdts_nodes)) \
"cp $SK_PATH/$FSNAME-s2s-server.key \
- $SK_PATH/$FSNAME-s2s-client.key; lgss_sk \
+ $SK_PATH/$FSNAME-s2s-client.key; $LGSS_SK \
-t client -m $SK_PATH/$FSNAME-s2s-client.key \
>/dev/null 2>&1"
do_nodes $(comma_list $(osts_nodes)) \
"cp $SK_PATH/$FSNAME-s2s-server.key \
- $SK_PATH/$FSNAME-s2s-client.key; lgss_sk \
+ $SK_PATH/$FSNAME-s2s-client.key; $LGSS_SK \
-t client -m $SK_PATH/$FSNAME-s2s-client.key \
>/dev/null 2>&1"
- do_nodes $clients "lgss_sk -t client \
+ do_nodes $clients "$LGSS_SK -t client \
-m $SK_PATH/$FSNAME-nmclient.key \
>/dev/null 2>&1"
fi
lctl set_param -n \
sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG"
fi
+
+ do_nodesv $(comma_list $(all_server_nodes)) \
+ "$LCTL set_param sptlrpc.gss.rsi_upcall=$L_GETAUTH"
}
cleanup_gss() {
local facet
for facet in ${facets//,/ }; do
- if [ $node == $(facet_host $facet) ] ||
- [ $node == "$(facet_failover_host $facet)" ]; then
+ if [[ $node == $(facet_host $facet) ]] ||
+ [[ $node == "$(facet_failover_host $facet)" ]]; then
fstype=$(facet_fstype $facet)
if [[ $fstypes != *$fstype* ]]; then
fstypes+="${fstypes:+,}$fstype"
echo -n $label
}
-mdsdevlabel() {
- local num=$1
- local device=$(mdsdevname $num)
- local label=$(devicelabel mds$num ${device} | grep -v "CMD: ")
- echo -n $label
-}
-
-ostdevlabel() {
- local num=$1
- local device=$(ostdevname $num)
- local label=$(devicelabel ost$num ${device} | grep -v "CMD: ")
- echo -n $label
-}
-
#
# Get the device of a facet.
#
case $virt in
VMware|KVM|VirtualBox|Parallels|Bochs)
- echo $virt | tr '[A-Z]' '[a-z]' ;;
+ echo $virt | tr '[A-Z]' '[a-z]' && return;;
+
+ *) ;;
+ esac
+
+ virt=$(dmidecode -s system-manufacturer | awk '{print $1}')
+ case $virt in
+ QEMU)
+ echo $virt | tr '[A-Z]' '[a-z]' && return;;
*) ;;
esac
}
local device=${2:-$FSNAME-OST*}
local name=$3
- do_nodes $nodes "$LCTL get_param -n obdfilter.$device.$name \
- osd-*.$device.$name 2>&1" | grep -v 'error:'
+ do_nodes $nodes "$LCTL get_param -n osd-*.$device.$name"
}
set_osd_param() {
local name=$3
local value=$4
- do_nodes $nodes "$LCTL set_param -n obdfilter.$device.$name=$value \
- osd-*.$device.$name=$value 2>&1" | grep -v 'error:'
-}
-
-set_debug_size () {
- local dz=${1:-$DEBUG_SIZE}
-
- if [ -f /sys/devices/system/cpu/possible ]; then
- local cpus=$(($(cut -d "-" -f 2 /sys/devices/system/cpu/possible)+1))
- else
- local cpus=$(getconf _NPROCESSORS_CONF 2>/dev/null)
- fi
-
- # bug 19944, adjust size to be -gt num_possible_cpus()
- # promise 2MB for every cpu at least
- if [ -n "$cpus" ] && [ $((cpus * 2)) -gt $dz ]; then
- dz=$((cpus * 2))
- fi
- lctl set_param debug_mb=$dz
+ do_nodes $nodes "$LCTL set_param -n osd-*.$device.$name=$value"
}
set_default_debug () {
local debug_size=${3:-$DEBUG_SIZE}
[ -n "$debug" ] && lctl set_param debug="$debug" >/dev/null
- [ -n "$subsys" ] && lctl set_param subsystem_debug="${subsys# }" >/dev/null
+ [ -n "$subsys" ] &&
+ lctl set_param subsystem_debug="${subsys# }" >/dev/null
+ [ -n "$debug_size" ] &&
+ lctl set_param debug_mb="$debug_size" >/dev/null
- [ -n "$debug_size" ] && set_debug_size $debug_size > /dev/null
+ return 0
}
set_default_debug_nodes () {
set_default_debug_nodes $node "$debug" "$subsys" $debug_size
}
-set_hostid () {
- local hostid=${1:-$(hostid)}
+set_params_nodes() {
+ local nodes=$1
+ shift
+ local params="$@"
- if [ ! -s /etc/hostid ]; then
- printf $(echo -n $hostid |
- sed 's/\(..\)\(..\)\(..\)\(..\)/\\x\4\\x\3\\x\2\\x\1/') >/etc/hostid
- fi
+ [[ -n "$params" ]] || return 0
+
+ do_nodes $nodes "$LCTL set_param $params"
}
-# Facet functions
-mount_facets () {
- local facets=${1:-$(get_facets)}
- local facet
+set_params_clients() {
+ (( $# >= 2 )) || return 0
+ local clients=${1:-$CLIENTS}
+ shift
+ local params="${@:-$CLIENT_LCTL_SETPARAM_PARAM}"
- for facet in ${facets//,/ }; do
- mount_facet $facet
- local RC=$?
- [ $RC -eq 0 ] && continue
+ set_params_nodes $clients $params
+}
- if [ "$TESTSUITE.$TESTNAME" = "replay-dual.test_0a" ]; then
+set_params_mdts() {
+ (( $# >= 2 )) || return 0
+ local mdts=${1:-$(comma_list $(mdts_nodes))}
+ shift
+ local params="${@:-$MDS_LCTL_SETPARAM_PARAM}"
+
+ set_params_nodes $mdts $params
+}
+
+set_params_osts() {
+ (( $# >= 2 )) || return 0
+ local osts=${1:-$(comma_list $(osts_nodes))}
+ shift
+ local params="${@:-$OSS_LCTL_SETPARAM_PARAM}"
+
+ set_params_nodes $osts $params
+}
+
+set_hostid () {
+ local hostid=${1:-$(hostid)}
+
+ if [ ! -s /etc/hostid ]; then
+ printf $(echo -n $hostid |
+ sed 's/\(..\)\(..\)\(..\)\(..\)/\\x\4\\x\3\\x\2\\x\1/') >/etc/hostid
+ fi
+}
+
+# Facet functions
+mount_facets () {
+ local facets=${1:-$(get_facets)}
+ local facet
+ local -a mountpids
+ local total=0
+ local ret=0
+
+ for facet in ${facets//,/ }; do
+ mount_facet $facet &
+ mountpids[total]=$!
+ total=$((total+1))
+ done
+ for ((index=0; index<$total; index++)); do
+ wait ${mountpids[index]}
+ local RC=$?
+ [ $RC -eq 0 ] && continue
+
+ if [ "$TESTSUITE.$TESTNAME" = "replay-dual.test_0a" ]; then
skip_noexit "Restart of $facet failed!." &&
touch $LU482_FAILED
else
error "Restart of $facet failed!"
fi
- return $RC
+ ret=$RC
done
+ return $ret
}
#
local devicelabel
local dm_dev=${!dev}
+ [[ $dev == "mgsfailover_dev" ]] && combined_mgs_mds &&
+ dev=mds1failover_dev
+
module_loaded lustre || load_modules
case $fstype in
if [ -f $TMP/test-lu482-trigger ]; then
RC=2
else
+ local seq_width=$(($OSTSEQWIDTH / $OSTCOUNT))
+ (( $seq_width >= 16384 )) || seq_width=16384
do_facet ${facet} \
"mkdir -p $mntpt; $MOUNT_CMD $opts $dm_dev $mntpt"
RC=${PIPESTATUS[0]}
+ if [[ ${facet} =~ ost ]]; then
+ do_facet ${facet} "$LCTL set_param \
+ seq.cli-$(devicelabel $facet $dm_dev)-super.width=$seq_width"
+ fi
fi
if [ $RC -ne 0 ]; then
local dev_alias=$(facet_device_alias $facet)
eval export ${dev_alias}_dev=${device}
- eval export ${facet}_opt=\"$@\"
+ eval export ${facet}_opt=\"$*\"
+
+ combined_mgs_mds && [[ ${dev_alias} == mds1 ]] &&
+ eval export mgs_dev=${device}
local varname=${dev_alias}failover_dev
if [ -n "${!varname}" ] ; then
eval export ${dev_alias}failover_dev=${!varname}
else
eval export ${dev_alias}failover_dev=$device
+ combined_mgs_mds && [[ ${dev_alias} == mds1 ]] &&
+ eval export mgsfailover_dev=${device}
+
fi
local mntpt=$(facet_mntpt $facet)
local mntpt=$(facet_mntpt $facet)
running=$(do_facet ${facet} "grep -c $mntpt' ' /proc/mounts || true")
if [ ${running} -ne 0 ]; then
- echo "Stopping $mntpt (opts:$@) on $HOST"
- do_facet ${facet} $UMOUNT $@ $mntpt
+ echo "Stopping $mntpt (opts:$*) on $HOST"
+ do_facet ${facet} $UMOUNT "$@" $mntpt
fi
# umount should block, but we should wait for unrelated obd's
# restore old quota type settings
restore_quota() {
+ for usr in $QUOTA_USERS; do
+ echo "Setting up quota on $HOSTNAME:$MOUNT for $usr..."
+ for type in u g; do
+ cmd="$LFS setquota -$type $usr -b 0"
+ cmd="$cmd -B 0 -i 0 -I 0 $MOUNT"
+ echo "+ $cmd"
+ eval $cmd || error "$cmd FAILED!"
+ done
+ # display the quota status
+ echo "Quota settings for $usr : "
+ $LFS quota -v -u $usr $MOUNT || true
+ done
if [ "$old_MDT_QUOTA_TYPE" ]; then
if [[ $PERM_CMD == *"set_param -P"* ]]; then
do_facet mgs $PERM_CMD \
- osd-*.$FSNAME-MDT*.quota_slave.enable = \
+ osd-*.$FSNAME-MDT*.quota_slave.enabled = \
$old_MDT_QUOTA_TYPE
else
do_facet mgs $PERM_CMD \
if [ "$old_OST_QUOTA_TYPE" ]; then
if [[ $PERM_CMD == *"set_param -P"* ]]; then
do_facet mgs $PERM_CMD \
- osd-*.$FSNAME-OST*.quota_slave.enable = \
+ osd-*.$FSNAME-OST*.quota_slave.enabled = \
$old_OST_QUOTA_TYPE
else
do_facet mgs $LCTL conf_param \
# This will allow fixing the "lfs df" summary line in the future.
lfs_df() {
$LFS df $* | sed -e 's/filesystem /filesystem_/'
+ check_lfs_df_ret_val ${PIPESTATUS[0]}
}
# Get free inodes on the MDT specified by mdt index, free indoes on
if [[ $PERM_CMD == *"set_param -P"* ]]; then
do_facet mgs $PERM_CMD \
- osd-*.$FSNAME-MDT*.quota_slave.enable=$QUOTA_TYPE
+ osd-*.$FSNAME-MDT*.quota_slave.enabled=$QUOTA_TYPE
do_facet mgs $PERM_CMD \
- osd-*.$FSNAME-OST*.quota_slave.enable=$QUOTA_TYPE
+ osd-*.$FSNAME-OST*.quota_slave.enabled=$QUOTA_TYPE
else
do_facet mgs $PERM_CMD $FSNAME.quota.mdt=$QUOTA_TYPE ||
error "set mdt quota type failed"
fi
set_default_debug_nodes $client
+ set_params_clients $client
return 0
}
# Mount the file system on the MDS
mount_mds_client() {
- local mds_HOST=${SINGLEMDS}_HOST
- echo $mds_HOST
- zconf_mount $mds1_HOST $MOUNT2 $MOUNT_OPTS ||
- error "unable to mount $MOUNT2 on MDS"
+ local host=$(facet_active_host $SINGLEMDS)
+ echo $host
+ zconf_mount $host $MOUNT2 $MOUNT_OPTS ||
+ error "unable to mount $MOUNT2 on $host"
}
# Unmount the file system on the MDS
umount_mds_client() {
- local mds_HOST=${SINGLEMDS}_HOST
- zconf_umount $mds1_HOST $MOUNT2
+ local host=$(facet_active_host $SINGLEMDS)
+ zconf_umount $host $MOUNT2
do_facet $SINGLEMDS "rmdir $MOUNT2"
}
# nodes is comma list
sanity_mount_check_nodes () {
- local nodes=$1
- shift
- local mnts="$@"
- local mnt
+ local nodes=$1
+ shift
+ local mnts="$@"
+ local mnt
- # FIXME: assume that all cluster nodes run the same os
- [ "$(uname)" = Linux ] || return 0
+ # FIXME: assume that all cluster nodes run the same os
+ [ "$(uname)" = Linux ] || return 0
- local rc=0
- for mnt in $mnts ; do
- do_nodes $nodes "running=\\\$(grep -c $mnt' ' /proc/mounts);
+ local rc=0
+ for mnt in $mnts ; do
+ do_nodes $nodes "running=\\\$(grep -c $mnt' ' /proc/mounts);
mpts=\\\$(mount | grep -c $mnt' ');
if [ \\\$running -ne \\\$mpts ]; then
echo \\\$(hostname) env are INSANE!;
exit 1;
fi"
- [ $? -eq 0 ] || rc=1
- done
- return $rc
+ [ $? -eq 0 ] || rc=1
+ done
+ return $rc
}
sanity_mount_check_servers () {
- [ -n "$CLIENTONLY" ] &&
- { echo "CLIENTONLY mode, skip mount_check_servers"; return 0; } || true
- echo Checking servers environments
-
- # FIXME: modify get_facets to display all facets wo params
- local facets="$(get_facets OST),$(get_facets MDS),mgs"
- local node
- local mntpt
- local facet
- for facet in ${facets//,/ }; do
- node=$(facet_host ${facet})
- mntpt=$(facet_mntpt $facet)
- sanity_mount_check_nodes $node $mntpt ||
- { error "server $node environments are insane!"; return 1; }
- done
+ [ -n "$CLIENTONLY" ] &&
+ { echo "CLIENTONLY mode, skip mount_check_servers"; return 0; } || true
+ echo Checking servers environments
+
+ # FIXME: modify get_facets to display all facets wo params
+ local facets="$(get_facets OST),$(get_facets MDS),mgs"
+ local node
+ local mntpt
+ local facet
+ for facet in ${facets//,/ }; do
+ node=$(facet_host ${facet})
+ mntpt=$(facet_mntpt $facet)
+ sanity_mount_check_nodes $node $mntpt ||
+ { error "server $node environments are insane!"; return 1; }
+ done
}
sanity_mount_check_clients () {
- local clients=${1:-$CLIENTS}
- local mntpt=${2:-$MOUNT}
- local mntpt2=${3:-$MOUNT2}
+ local clients=${1:-$CLIENTS}
+ local mntpt=${2:-$MOUNT}
+ local mntpt2=${3:-$MOUNT2}
- [ -z $clients ] && clients=$(hostname)
- echo Checking clients $clients environments
+ [ -z $clients ] && clients=$(hostname)
+ echo Checking clients $clients environments
- sanity_mount_check_nodes $clients $mntpt $mntpt2 ||
- error "clients environments are insane!"
+ sanity_mount_check_nodes $clients $mntpt $mntpt2 ||
+ error "clients environments are insane!"
}
sanity_mount_check () {
- sanity_mount_check_servers || return 1
- sanity_mount_check_clients || return 2
+ sanity_mount_check_servers || return 1
+ sanity_mount_check_clients || return 2
}
# mount clients if not mouted
local i=0
# Mount all server nodes first with per-NM keys
for nmclient in ${clients//,/ }; do
-# do_nodes $(comma_list $(all_server_nodes)) "lgss_sk -t server -l $SK_PATH/nodemap/c$i.key -n c$i"
- do_nodes $(comma_list $(all_server_nodes)) "lgss_sk -t server -l $SK_PATH/nodemap/c$i.key"
+ do_nodes $(comma_list $(all_server_nodes)) \
+ "$LGSS_SK -t server -l $SK_PATH/nodemap/c$i.key"
i=$((i + 1))
done
# set perms for per-nodemap keys else permission denied
fi
do_node $nmclient "! grep -q $mnt' ' \
/proc/mounts || umount $mnt"
- local prunedopts=$(add_sk_mntflag $prunedopts);
+ local prunedopts=$(add_sk_mntflag $opts);
prunedopts=$(echo $prunedopts | sed -e \
"s#skpath=[^ ^,]*#skpath=$mountkey#g")
set -x
do_nodes $clients "mount | grep $mnt' '"
set_default_debug_nodes $clients
+ set_params_clients $clients
return 0
}
zconf_umount_clients() {
- local clients=$1
- local mnt=$2
- local force
+ local clients=$1
+ local mnt=$2
+ local force
- [ "$3" ] && force=-f
+ [ "$3" ] && force=-f
- echo "Stopping clients: $clients $mnt (opts:$force)"
- do_nodes $clients "running=\\\$(grep -c $mnt' ' /proc/mounts);
+ echo "Stopping clients: $clients $mnt (opts:$force)"
+ do_nodes $clients "running=\\\$(grep -c $mnt' ' /proc/mounts);
if [ \\\$running -ne 0 ] ; then
echo Stopping client \\\$(hostname) $mnt opts:$force;
lsof $mnt || need_kill=no;
}
shutdown_node () {
- local node=$1
- echo + $POWER_DOWN $node
- $POWER_DOWN $node
+ local node=$1
+
+ echo + $POWER_DOWN $node
+ $POWER_DOWN $node
}
shutdown_node_hard () {
- local host=$1
- local attempts=$SHUTDOWN_ATTEMPTS
+ local host=$1
+ local attempts=$SHUTDOWN_ATTEMPTS
- for i in $(seq $attempts) ; do
- shutdown_node $host
- sleep 1
- wait_for_function --quiet "! ping -w 3 -c 1 $host" 5 1 && return 0
- echo "waiting for $host to fail attempts=$attempts"
- [ $i -lt $attempts ] || \
- { echo "$host still pingable after power down! attempts=$attempts" && return 1; }
- done
+ for i in $(seq $attempts) ; do
+ shutdown_node $host
+ sleep 1
+ wait_for_function --quiet "! ping -w 3 -c 1 $host" 5 1 &&
+ return 0
+ echo "waiting for $host to fail attempts=$attempts"
+ [ $i -lt $attempts ] ||
+ { echo "$host still pingable after power down! attempts=$attempts" && return 1; }
+ done
}
shutdown_client() {
- local client=$1
- local mnt=${2:-$MOUNT}
- local attempts=3
-
- if [ "$FAILURE_MODE" = HARD ]; then
- shutdown_node_hard $client
- else
- zconf_umount_clients $client $mnt -f
- fi
+ local client=$1
+ local mnt=${2:-$MOUNT}
+ local attempts=3
+
+ if [ "$FAILURE_MODE" = HARD ]; then
+ shutdown_node_hard $client
+ else
+ zconf_umount_clients $client $mnt -f
+ fi
}
facets_on_host () {
- local host=$1
- local facets="$(get_facets OST),$(get_facets MDS)"
- local affected
+ local affected
+ local host=$1
+ local facets="$(get_facets OST),$(get_facets MDS)"
- combined_mgs_mds || facets="$facets,mgs"
+ combined_mgs_mds || facets="$facets,mgs"
- for facet in ${facets//,/ }; do
- if [ $(facet_active_host $facet) == $host ]; then
- affected="$affected $facet"
- fi
- done
+ for facet in ${facets//,/ }; do
+ if [ $(facet_active_host $facet) == $host ]; then
+ affected="$affected $facet"
+ fi
+ done
- echo $(comma_list $affected)
+ echo $(comma_list $affected)
}
facet_up() {
}
facets_up_on_host () {
- local host=$1
- local facets=$(facets_on_host $host)
- local affected_up
+ local affected_up
+ local host=$1
+ local facets=$(facets_on_host $host)
- for facet in ${facets//,/ }; do
- if $(facet_up $facet $host); then
- affected_up="$affected_up $facet"
- fi
- done
+ for facet in ${facets//,/ }; do
+ if $(facet_up $facet $host); then
+ affected_up="$affected_up $facet"
+ fi
+ done
- echo $(comma_list $affected_up)
+ echo $(comma_list $affected_up)
}
shutdown_facet() {
}
reboot_node() {
- local node=$1
- echo + $POWER_UP $node
- $POWER_UP $node
+ local node=$1
+
+ echo + $POWER_UP $node
+ $POWER_UP $node
}
remount_facet() {
- local facet=$1
+ local facet=$1
- stop $facet
- mount_facet $facet
+ stop $facet
+ mount_facet $facet
}
reboot_facet() {
local facet=$1
+ local node=$(facet_active_host $facet)
+ local sleep_time=${2:-10}
+
if [ "$FAILURE_MODE" = HARD ]; then
- reboot_node $(facet_active_host $facet)
+ boot_node $node
else
- sleep 10
+ sleep $sleep_time
fi
}
boot_node() {
- local node=$1
- if [ "$FAILURE_MODE" = HARD ]; then
- reboot_node $node
- wait_for_host $node
- fi
+ local node=$1
+
+ if [ "$FAILURE_MODE" = HARD ]; then
+ reboot_node $node
+ wait_for_host $node
+ if $LOAD_MODULES_REMOTE; then
+ echo "loading modules on $node: $facet"
+ do_rpc_nodes $node load_modules_local
+ fi
+ fi
}
facets_hosts () {
- local facets=$1
- local hosts
+ local hosts
+ local facets=$1
- for facet in ${facets//,/ }; do
- hosts=$(expand_list $hosts $(facet_host $facet) )
- done
+ for facet in ${facets//,/ }; do
+ hosts=$(expand_list $hosts $(facet_host $facet) )
+ done
- echo $hosts
+ echo $hosts
}
_check_progs_installed () {
- local progs=$@
- local rc=0
+ local progs=$@
+ local rc=0
- for prog in $progs; do
- if ! [ "$(which $prog)" -o "${!prog}" ]; then
- echo $prog missing on $(hostname)
- rc=1
- fi
- done
- return $rc
+ for prog in $progs; do
+ if ! [ "$(which $prog)" -o "${!prog}" ]; then
+ echo $prog missing on $(hostname)
+ rc=1
+ fi
+ done
+ return $rc
}
check_progs_installed () {
local nodes=$1
shift
- do_rpc_nodes "$nodes" _check_progs_installed $@
+ do_rpc_nodes "$nodes" _check_progs_installed "$@"
}
# recovery-scale functions
return $RC
}
check_client_loads () {
- local clients=${1//,/ }
- local client=
- local rc=0
+ local clients=${1//,/ }
+ local client=
+ local rc=0
- for client in $clients; do
- check_client_load $client
- rc=${PIPESTATUS[0]}
- if [ "$rc" != 0 ]; then
- log "Client load failed on node $client, rc=$rc"
- return $rc
- fi
- done
+ for client in $clients; do
+ check_client_load $client
+ rc=${PIPESTATUS[0]}
+ if [ "$rc" != 0 ]; then
+ log "Client load failed on node $client, rc=$rc"
+ return $rc
+ fi
+ done
}
restart_client_loads () {
- local clients=${1//,/ }
- local expectedfail=${2:-""}
- local client=
- local rc=0
-
- for client in $clients; do
- check_client_load $client
- rc=${PIPESTATUS[0]}
- if [ "$rc" != 0 -a "$expectedfail" ]; then
- local var=$(node_var_name $client)_load
- start_client_load $client ${!var}
- echo "Restarted client load ${!var}: on $client. Checking ..."
- check_client_load $client
- rc=${PIPESTATUS[0]}
- if [ "$rc" != 0 ]; then
- log "Client load failed to restart on node $client, rc=$rc"
- # failure one client load means test fail
- # we do not need to check other
- return $rc
- fi
- else
- return $rc
- fi
- done
+ local clients=${1//,/ }
+ local expectedfail=${2:-""}
+ local client=
+ local rc=0
+
+ for client in $clients; do
+ check_client_load $client
+ rc=${PIPESTATUS[0]}
+ if [ "$rc" != 0 -a "$expectedfail" ]; then
+ local var=$(node_var_name $client)_load
+
+ start_client_load $client ${!var}
+ echo "Restarted client load ${!var}: on $client. Checking ..."
+ check_client_load $client
+ rc=${PIPESTATUS[0]}
+ if [ "$rc" != 0 ]; then
+ log "Client load failed to restart on node $client, rc=$rc"
+ # failure one client load means test fail
+ # we do not need to check other
+ return $rc
+ fi
+ else
+ return $rc
+ fi
+ done
}
# Start vmstat and save its process ID in a file.
start_vmstat() {
- local nodes=$1
- local pid_file=$2
+ local nodes=$1
+ local pid_file=$2
- [ -z "$nodes" -o -z "$pid_file" ] && return 0
+ [ -z "$nodes" -o -z "$pid_file" ] && return 0
- do_nodes $nodes \
+ do_nodes $nodes \
"vmstat 1 > $TESTLOG_PREFIX.$TESTNAME.vmstat.\\\$(hostname -s).log \
2>/dev/null </dev/null & echo \\\$! > $pid_file"
}
# Display the nodes on which client loads failed.
print_end_run_file() {
- local file=$1
- local node
+ local file=$1
+ local node
- [ -s $file ] || return 0
+ [ -s $file ] || return 0
- echo "Found the END_RUN_FILE file: $file"
- cat $file
+ echo "Found the END_RUN_FILE file: $file"
+ cat $file
- # A client load will stop if it finds the END_RUN_FILE file.
- # That does not mean the client load actually failed though.
- # The first node in END_RUN_FILE is the one we are interested in.
- read node < $file
+ # A client load will stop if it finds the END_RUN_FILE file.
+ # That does not mean the client load actually failed though.
+ # The first node in END_RUN_FILE is the one we are interested in.
+ read node < $file
- if [ -n "$node" ]; then
- local var=$(node_var_name $node)_load
+ if [ -n "$node" ]; then
+ local var=$(node_var_name $node)_load
- local prefix=$TESTLOG_PREFIX
- [ -n "$TESTNAME" ] && prefix=$prefix.$TESTNAME
- local stdout_log=$prefix.run_${!var}_stdout.$node.log
- local debug_log=$(echo $stdout_log | sed 's/\(.*\)stdout/\1debug/')
+ local prefix=$TESTLOG_PREFIX
+ [ -n "$TESTNAME" ] && prefix=$prefix.$TESTNAME
+ local stdout_log=$prefix.run_${!var}_stdout.$node.log
+ local debug_log=$(echo $stdout_log |
+ sed 's/\(.*\)stdout/\1debug/')
- echo "Client load ${!var} failed on node $node:"
- echo "$stdout_log"
- echo "$debug_log"
- fi
+ echo "Client load ${!var} failed on node $node:"
+ echo "$stdout_log"
+ echo "$debug_log"
+ fi
}
# Stop the process which had its PID saved in a file.
stop_process() {
- local nodes=$1
- local pid_file=$2
+ local nodes=$1
+ local pid_file=$2
- [ -z "$nodes" -o -z "$pid_file" ] && return 0
+ [ -z "$nodes" -o -z "$pid_file" ] && return 0
- do_nodes $nodes "test -f $pid_file &&
- { kill -s TERM \\\$(cat $pid_file); rm -f $pid_file; }" || true
+ do_nodes $nodes "test -f $pid_file &&
+ { kill -s TERM \\\$(cat $pid_file); rm -f $pid_file; }" || true
}
# Stop all client loads.
stop_client_loads() {
- local nodes=${1:-$CLIENTS}
- local pid_file=$2
+ local nodes=${1:-$CLIENTS}
+ local pid_file=$2
- # stop the client loads
- stop_process $nodes $pid_file
+ # stop the client loads
+ stop_process $nodes $pid_file
- # clean up the processes that started them
- [ -n "$CLIENT_LOAD_PIDS" ] && kill -9 $CLIENT_LOAD_PIDS 2>/dev/null || true
+ # clean up the processes that started them
+ [ -n "$CLIENT_LOAD_PIDS" ] &&
+ kill -9 $CLIENT_LOAD_PIDS 2>/dev/null || true
}
# End recovery-scale functions
-# verify that lustre actually cleaned up properly
-cleanup_check() {
- VAR=$(lctl get_param -n catastrophe 2>&1)
- if [ $? = 0 ] ; then
- if [ $VAR != 0 ]; then
- error "LBUG/LASSERT detected"
- fi
- fi
- BUSY=$(dmesg | grep -i destruct || true)
- if [ -n "$BUSY" ]; then
- echo "$BUSY" 1>&2
- [ -e $TMP/debug ] && mv $TMP/debug $TMP/debug-busy.$(date +%s)
- exit 205
- fi
-
- check_mem_leak || exit 204
-
- [[ $($LCTL dl 2>/dev/null | wc -l) -gt 0 ]] && $LCTL dl &&
- echo "$TESTSUITE: lustre didn't clean up..." 1>&2 &&
- return 202 || true
-
- if module_loaded lnet || module_loaded libcfs; then
- echo "$TESTSUITE: modules still loaded..." 1>&2
- /sbin/lsmod 1>&2
- return 203
- fi
- return 0
-}
+##
+# wait for a command to return the expected result
+#
+# This will run @check on @node repeatedly until the output matches @expect
+# based on the supplied condition, or until @max_wait seconds have elapsed,
+# whichever comes first. @cond may be one of the normal bash operators,
+# "-gt", "-ge", "-eq", "-le", "-lt", "==", "!=", or "=~", and must be quoted
+# in the caller to avoid unintentional evaluation by the shell in the caller.
+#
+# If @max_wait is not specified, the condition will be checked for up to 90s.
+#
+# If --verbose is passed as the first argument, the result is printed on each
+# value change, otherwise it is only printed after every 10s interval.
+#
+# If --quiet is passed as the first/second argument, the do_node() command
+# will not print the remote command before executing it each time.
+#
+# Using wait_update_cond() or related helper function is preferable to adding
+# a "long enough" wait for some state to change in the background, since
+# "long enough" may be too short due to tunables, system config, or running in
+# a VM, and must by necessity wait too long for most cases or risk failure.
+#
+# usage: wait_update_cond [--verbose] [--quiet] node check cond expect [max_wait]
+wait_update_cond() {
+ local verbose
+ local quiet
-wait_update () {
- local verbose=false
- if [[ "$1" == "--verbose" ]]; then
- shift
- verbose=true
- fi
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
local node=$1
- local TEST=$2
- local FINAL=$3
- local MAX=${4:-90}
- local RESULT
- local PREV_RESULT
- local WAIT=0
+ local check="$2"
+ local cond="$3"
+ local expect="$4"
+ local max_wait=${5:-90}
+ local result
+ local prev_result
+ local waited=0
+ local begin=$SECONDS
local sleep=1
local print=10
- PREV_RESULT=$(do_node $node "$TEST")
- while [ true ]; do
- RESULT=$(do_node $node "$TEST")
- if [[ "$RESULT" == "$FINAL" ]]; then
- [[ -z "$RESULT" || $WAIT -le $sleep ]] ||
- echo "Updated after ${WAIT}s: wanted '$FINAL'"\
- "got '$RESULT'"
+ while (( $waited <= $max_wait )); do
+ result=$(do_node $quiet $node "$check")
+
+ eval [[ "'$result'" $cond "'$expect'" ]]
+ if [[ $? == 0 ]]; then
+ [[ -n "$quiet" ]] && return 0
+ [[ -z "$result" || $waited -le $sleep ]] ||
+ echo "Updated after ${waited}s: want '$expect' got '$result'"
return 0
fi
- if [[ $verbose && "$RESULT" != "$PREV_RESULT" ]]; then
- echo "Changed after ${WAIT}s: from '$PREV_RESULT'"\
- "to '$RESULT'"
- PREV_RESULT=$RESULT
+ if [[ -n "$verbose" && "$result" != "$prev_result" ]]; then
+ [[ -z "$quiet" && -n "$prev_result" ]] &&
+ echo "Changed after ${waited}s: from '$prev_result' to '$result'"
+ prev_result="$result"
fi
- [[ $WAIT -ge $MAX ]] && break
- [[ $((WAIT % print)) -eq 0 ]] &&
- echo "Waiting $((MAX - WAIT)) secs for update"
- WAIT=$((WAIT + sleep))
+ (( $waited % $print == 0 )) && {
+ [[ -z "$quiet" ]] &&
+ echo "Waiting $((max_wait - waited))s for '$expect'"
+ }
+
sleep $sleep
+ waited=$((SECONDS - begin))
done
- echo "Update not seen after ${MAX}s: wanted '$FINAL' got '$RESULT'"
+
+ [[ -z "$quiet" ]] &&
+ echo "Update not seen after ${max_wait}s: want '$expect' got '$result'"
+
return 3
}
+# usage: wait_update [--verbose] [--quiet] node check expect [max_wait]
+wait_update() {
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
+ local node="$1"
+ local check="$2"
+ local expect="$3"
+ local max_wait=$4
+
+ wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait
+}
+
+# usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait]
+wait_update_facet_cond() {
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
+ local node=$(facet_active_host $1)
+ local check="$2"
+ local cond="$3"
+ local expect="$4"
+ local max_wait=$5
+
+ wait_update_cond $verbose $quiet $node "$check" "$cond" "$expect" $max_wait
+}
+
+# usage: wait_update_facet [--verbose] facet check expect [max_wait]
wait_update_facet() {
- local verbose=
- [ "$1" = "--verbose" ] && verbose="$1" && shift
+ local verbose
+ local quiet
- local facet=$1
- shift
- wait_update $verbose $(facet_active_host $facet) "$@"
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
+ local node=$(facet_active_host $1)
+ local check="$2"
+ local expect="$3"
+ local max_wait=$4
+
+ wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait
}
sync_all_data() {
}
wait_for_facet() {
- local facetlist=$1
- local hostlist
+ local facetlist=$1
+ local hostlist
- for facet in ${facetlist//,/ }; do
- hostlist=$(expand_list $hostlist $(facet_active_host $facet))
- done
- wait_for_host $hostlist
+ for facet in ${facetlist//,/ }; do
+ hostlist=$(expand_list $hostlist $(facet_active_host $facet))
+ done
+ wait_for_host $hostlist
}
_wait_recovery_complete () {
- local param=$1
+ local param=$1
- # Use default policy if $2 is not passed by caller.
- local MAX=${2:-$(max_recovery_time)}
+ # Use default policy if $2 is not passed by caller.
+ local MAX=${2:-$(max_recovery_time)}
- local WAIT=0
- local STATUS=
+ local WAIT=0
+ local STATUS=
- while [ $WAIT -lt $MAX ]; do
- STATUS=$(lctl get_param -n $param | grep status)
- echo $param $STATUS
- [[ $STATUS = "status: COMPLETE" || $STATUS = "status: INACTIVE" ]] && return 0
- sleep 5
- WAIT=$((WAIT + 5))
- echo "Waiting $((MAX - WAIT)) secs for $param recovery done. $STATUS"
- done
- echo "$param recovery not done in $MAX sec. $STATUS"
- return 1
+ while [ $WAIT -lt $MAX ]; do
+ STATUS=$(lctl get_param -n $param | grep status)
+ echo $param $STATUS
+ [[ $STATUS == "status: COMPLETE" ||
+ $STATUS == "status: INACTIVE" ]] && return 0
+ sleep 5
+ WAIT=$((WAIT + 5))
+ echo "Waiting $((MAX - WAIT)) secs for $param recovery done. $STATUS"
+ done
+ echo "$param recovery not done in $MAX sec. $STATUS"
+ return 1
}
wait_recovery_complete () {
- local facet=$1
+ local facet=$1
- # with an assumption that at_max is the same on all nodes
- local MAX=${2:-$(max_recovery_time)}
+ # with an assumption that at_max is the same on all nodes
+ local MAX=${2:-$(max_recovery_time)}
- local facets=$facet
- if [ "$FAILURE_MODE" = HARD ]; then
- facets=$(facets_on_host $(facet_active_host $facet))
- fi
- echo affected facets: $facets
+ local facets=$facet
+ if [ "$FAILURE_MODE" = HARD ]; then
+ facets=$(facets_on_host $(facet_active_host $facet))
+ fi
+ echo affected facets: $facets
- # we can use "for" here because we are waiting the slowest
- for facet in ${facets//,/ }; do
+ facets=${facets//,/ }
+ # We can use "for" here because we are waiting the slowest.
+ # The mgs not having the recovery_status proc entry, exclude it
+ # from the facet list.
+ for facet in ${facets//mgs/ }; do
local var_svc=${facet}_svc
local param="*.${!var_svc}.recovery_status"
}
wait_destroy_complete () {
- echo "Waiting for local destroys to complete"
+ echo "Waiting for MDT destroys to complete"
# MAX value shouldn't be big as this mean server responsiveness
# never increase this just to make test pass but investigate
# why it takes so long time
- local MAX=5
+ local MAX=${1:-5}
local WAIT=0
+ local list=$(comma_list $(mdts_nodes))
while [ $WAIT -lt $MAX ]; do
- local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight))
+ local -a RPCs=($(do_nodes $list $LCTL get_param -n osp.*.destroys_in_flight))
local con=1
local i
con=0
break;
done
- sleep 1
[ ${con} -eq 1 ] && return 0 # done waiting
+ sleep 1
echo "Waiting ${WAIT}s for local destroys to complete"
WAIT=$((WAIT + 1))
done
- echo "Local destroys weren't done in $MAX sec."
+ echo "MDT destroys weren't done in $MAX sec."
return 1
}
}
wait_exit_ST () {
- local facet=$1
-
- local WAIT=0
- local INTERVAL=1
- local running
- # conf-sanity 31 takes a long time cleanup
- while [ $WAIT -lt 300 ]; do
- running=$(do_facet ${facet} "lsmod | grep lnet > /dev/null &&
+ local facet=$1
+
+ local WAIT=0
+ local INTERVAL=1
+ local running
+ # conf-sanity 31 takes a long time cleanup
+ while [ $WAIT -lt 300 ]; do
+ running=$(do_facet ${facet} "lsmod | grep lnet > /dev/null &&
lctl dl | grep ' ST ' || true")
- [ -z "${running}" ] && return 0
- echo "waited $WAIT for${running}"
- [ $INTERVAL -lt 64 ] && INTERVAL=$((INTERVAL + INTERVAL))
- sleep $INTERVAL
- WAIT=$((WAIT + INTERVAL))
- done
- echo "service didn't stop after $WAIT seconds. Still running:"
- echo ${running}
- return 1
+ [ -z "${running}" ] && return 0
+ echo "waited $WAIT for${running}"
+ [ $INTERVAL -lt 64 ] && INTERVAL=$((INTERVAL + INTERVAL))
+ sleep $INTERVAL
+ WAIT=$((WAIT + INTERVAL))
+ done
+ echo "service didn't stop after $WAIT seconds. Still running:"
+ echo ${running}
+ return 1
}
wait_remote_prog () {
- local prog=$1
- local WAIT=0
- local INTERVAL=5
- local rc=0
-
- [ "$PDSH" = "no_dsh" ] && return 0
-
- while [ $WAIT -lt $2 ]; do
- running=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep) || true
- [ -z "${running}" ] && return 0 || true
- echo "waited $WAIT for: "
- echo "$running"
- [ $INTERVAL -lt 60 ] && INTERVAL=$((INTERVAL + INTERVAL))
- sleep $INTERVAL
- WAIT=$((WAIT + INTERVAL))
- done
- local pids=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep | awk '{print $2}')
- [ -z "$pids" ] && return 0
- echo "$PDSH processes still exists after $WAIT seconds. Still running: $pids"
- # FIXME: not portable
- for pid in $pids; do
- cat /proc/${pid}/status || true
- cat /proc/${pid}/wchan || true
- echo "Killing $pid"
- kill -9 $pid || true
- sleep 1
- ps -P $pid && rc=1
- done
-
- return $rc
+ local prog=$1
+ local WAIT=0
+ local INTERVAL=5
+ local rc=0
+
+ [ "$PDSH" = "no_dsh" ] && return 0
+
+ while [ $WAIT -lt $2 ]; do
+ running=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" |
+ grep -v grep) || true
+ [ -z "${running}" ] && return 0 || true
+ echo "waited $WAIT for: "
+ echo "$running"
+ [ $INTERVAL -lt 60 ] && INTERVAL=$((INTERVAL + INTERVAL))
+ sleep $INTERVAL
+ WAIT=$((WAIT + INTERVAL))
+ done
+ local pids=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" |
+ grep -v grep | awk '{print $2}')
+ [ -z "$pids" ] && return 0
+ echo "$PDSH processes still exists after $WAIT seconds. Still running: $pids"
+ # FIXME: not portable
+ for pid in $pids; do
+ cat /proc/${pid}/status || true
+ cat /proc/${pid}/wchan || true
+ echo "Killing $pid"
+ kill -9 $pid || true
+ sleep 1
+ ps -P $pid && rc=1
+ done
+
+ return $rc
}
-lfs_df_check() {
+_lfs_df_check() {
local clients=${1:-$CLIENTS}
+ local rc=0
- if [ -z "$clients" ]; then
- $LFS df $MOUNT
+ if [[ -z "$clients" ]]; then
+ $LFS df $MOUNT > /dev/null || rc=$?
else
- $PDSH $clients "$LFS df $MOUNT" > /dev/null
+ $PDSH $clients "$LFS df $MOUNT" > /dev/null || rc=$?
fi
+
+ return $rc
+}
+
+lfs_df_check() {
+ local clients=${1:-$CLIENTS}
+ local rc=0
+
+ _lfs_df_check "$clients" || rc=$?
+
+ check_lfs_df_ret_val $rc
}
clients_up() {
lfs_df_check
}
+all_mds_up() {
+ (( MDSCOUNT == 1 )) && return
+
+ # wait so that statfs data on MDT expire
+ local delay=$(do_facet mds1 $LCTL \
+ get_param -n osp.*MDT*MDT0000.maxage | sort -n | tail -1)
+
+ [ -n "$delay" ] || error "fail to get maxage"
+ sleep $delay
+ local nodes=$(comma_list $(mdts_nodes))
+ # initiate statfs RPC, all to all MDTs
+ do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null
+ do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null
+}
+
client_up() {
# usually checked on particular client or locally
sleep 1
}
client_evicted() {
- ! client_up $1
+ sleep 1
+ ! _lfs_df_check $1
}
client_reconnect_try() {
}
affected_facets () {
- local facet=$1
+ local facet=$1
- local host=$(facet_active_host $facet)
- local affected=$facet
+ local host=$(facet_active_host $facet)
+ local affected=$facet
- if [ "$FAILURE_MODE" = HARD ]; then
- affected=$(facets_up_on_host $host)
- fi
- echo $affected
+ if [ "$FAILURE_MODE" = HARD ]; then
+ affected=$(facets_up_on_host $host)
+ fi
+ echo $affected
}
facet_failover() {
skip=0
#check whether facet has been included in other affected facets
for ((index=0; index<$total; index++)); do
- [[ *,$facet,* == ,${affecteds[index]}, ]] && skip=1
+ [[ ,${affecteds[index]}, == *,$facet,* ]] && skip=1
done
if [ $skip -eq 0 ]; then
shutdown_facet $facet
done
- $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \
- $(mdsdevname 1) "-n" || error "Running e2fsck")
+ echo "$(date +'%H:%M:%S (%s)') shut down"
- for ((index=0; index<$total; index++)); do
- facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1)
- echo reboot facets: ${affecteds[index]}
+ local hostlist
+ local waithostlist
+
+ for facet in ${facets//,/ }; do
+ local host=$(facet_active_host $facet)
+
+ hostlist=$(expand_list $hostlist $host)
+ if [ $(facet_host $facet) = \
+ $(facet_failover_host $facet) ]; then
+ waithostlist=$(expand_list $waithostlist $host)
+ fi
+ done
+
+ if [ "$FAILURE_MODE" = HARD ]; then
+ for host in ${hostlist//,/ }; do
+ reboot_node $host
+ done
+ echo "$(date +'%H:%M:%S (%s)') $hostlist rebooted"
+ # We need to wait the rebooted hosts in case if
+ # facet_HOST == facetfailover_HOST
+ if ! [ -z "$waithostlist" ]; then
+ wait_for_host $waithostlist
+ if $LOAD_MODULES_REMOTE; then
+ echo "loading modules on $waithostlist"
+ do_rpc_nodes $waithostlist load_modules_local
+ fi
+ fi
+ else
+ sleep 10
+ fi
+
+ if [[ " ${affecteds[@]} " =~ " $SINGLEMDS " ]]; then
+ change_active $SINGLEMDS
+ fi
- reboot_facet $facet
+ $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \
+ $(facet_device $SINGLEMDS) "-n" || error "Running e2fsck")
- change_active ${affecteds[index]}
+ local -a mountpids
- wait_for_facet ${affecteds[index]}
+ for ((index=0; index<$total; index++)); do
+ if [[ ${affecteds[index]} != $SINGLEMDS ]]; then
+ change_active ${affecteds[index]}
+ fi
if $GSS_SK; then
init_gss
init_facets_vars_simple
if ! combined_mgs_mds &&
list_member ${affecteds[index]} mgs; then
mount_facet mgs || error "Restart of mgs failed"
+ affecteds[index]=$(exclude_items_from_list \
+ ${affecteds[index]} mgs)
+ fi
+ if [ -n "${affecteds[index]}" ]; then
+ echo mount facets: ${affecteds[index]}
+ mount_facets ${affecteds[index]} &
+ mountpids[index]=$!
fi
- # FIXME; has to be changed to mount all facets concurrently
- affected=$(exclude_items_from_list ${affecteds[index]} mgs)
- echo mount facets: ${affecteds[index]}
- mount_facets ${affecteds[index]}
+ done
+ for ((index=0; index<$total; index++)); do
+ if [ -n "${affecteds[index]}" ]; then
+ wait ${mountpids[index]}
+ fi
+
if $GSS_SK; then
do_nodes $(comma_list $(all_nodes)) \
"keyctl show | grep lustre | cut -c1-11 |
xargs -IX keyctl setperm X 0x3f3f3f3f"
fi
done
-}
+ echo "$(date +'%H:%M:%S (%s)') targets are mounted"
+
+ if [ "$FAILURE_MODE" = HARD ]; then
+ hostlist=$(exclude_items_from_list $hostlist $waithostlist)
+ if ! [ -z "$hostlist" ]; then
+ wait_for_host $hostlist
+ if $LOAD_MODULES_REMOTE; then
+ echo "loading modules on $hostlist"
+ do_rpc_nodes $hostlist load_modules_local
+ fi
+ fi
+ fi
-obd_name() {
- local facet=$1
+ echo "$(date +'%H:%M:%S (%s)') facet_failover done"
}
replay_barrier() {
export SK_NO_KEY=$SK_NO_KEY_save
# to initiate all OSC idling connections
clients_up
- wait_clients_import_state "$clients" "$facets" "\(FULL\|IDLE\)"
+ wait_clients_import_ready "$clients" "$facets"
clients_up || error "post-failover stat: $?"
}
fail_nodf() {
- local facet=$1
- facet_failover $facet
+ local facet=$1
+
+ facet_failover $facet
}
fail_abort() {
local facet=$1
+ local abort_type=${2:-"abort_recovery"}
+
stop $facet
change_active $facet
wait_for_facet $facet
- mount_facet $facet -o abort_recovery
+ mount_facet $facet -o $abort_type
clients_up || echo "first stat failed: $?"
clients_up || error "post-failover stat: $?"
+ all_mds_up
+}
+
+# LU-16159: abort recovery will cancel update logs, which may leave broken
+# directories in the system, remove name entry if necessary
+fail_abort_cleanup() {
+ rm -rf $DIR/$tdir/*
+ find $DIR/$tdir -depth | while read D; do
+ rmdir "$D" || $LFS rm_entry "$D" || error "rm $D failed"
+ done
}
host_nids_address() {
}
declare -fx h2nettype
-# Wrapper function to print the deprecation warning
-h2tcp() {
- echo "h2tcp: deprecated, use h2nettype instead" 1>&2
- if [[ -n "$NETTYPE" ]]; then
- h2nettype "$@"
- else
- h2nettype "$1" "tcp"
- fi
-}
-
-# Wrapper function to print the deprecation warning
-h2o2ib() {
- echo "h2o2ib: deprecated, use h2nettype instead" 1>&2
- if [[ -n "$NETTYPE" ]]; then
- h2nettype "$@"
- else
- h2nettype "$1" "o2ib"
- fi
-}
-
# This enables variables in cfg/"setup".sh files to support the pdsh HOSTLIST
# expressions format. As a bonus we can then just pass in those variables
# to pdsh. What this function does is take a HOSTLIST type string and
# expand it into a space deliminated list for us.
hostlist_expand() {
- local hostlist=$1
- local offset=$2
- local myList
- local item
- local list
-
- [ -z "$hostlist" ] && return
-
- # Translate the case of [..],..,[..] to [..] .. [..]
- list="${hostlist/],/] }"
- front=${list%%[*}
- [[ "$front" == *,* ]] && {
- new="${list%,*} "
- old="${list%,*},"
- list=${list/${old}/${new}}
- }
-
- for item in $list; do
- # Test if we have any []'s at all
- if [ "$item" != "${item/\[/}" ]; then {
- # Expand the [*] into list
- name=${item%%[*}
- back=${item#*]}
-
- if [ "$name" != "$item" ]; then
- group=${item#$name[*}
- group=${group%%]*}
-
- for range in ${group//,/ }; do
- local order
-
- begin=${range%-*}
- end=${range#*-}
-
- # Number of leading zeros
- padlen=${#begin}
- padlen2=${#end}
- end=$(echo $end | sed 's/0*//')
- [[ -z "$end" ]] && end=0
- [[ $padlen2 -gt $padlen ]] && {
- [[ $padlen2 -eq ${#end} ]] && padlen2=0
- padlen=$padlen2
- }
- begin=$(echo $begin | sed 's/0*//')
- [ -z $begin ] && begin=0
-
- if [ ! -z "${begin##[!0-9]*}" ]; then
- order=$(seq -f "%0${padlen}g" $begin $end)
- else
- order=$(eval echo {$begin..$end});
- fi
-
- for num in $order; do
- value="${name#*,}${num}${back}"
- [ "$value" != "${value/\[/}" ] && {
- value=$(hostlist_expand "$value")
- }
- myList="$myList $value"
- done
- done
- fi
- } else {
- myList="$myList $item"
- } fi
- done
- myList=${myList//,/ }
- myList=${myList:1} # Remove first character which is a space
-
- # Filter any duplicates without sorting
- list="$myList "
- myList="${list%% *}"
-
- while [[ "$list" != ${myList##* } ]]; do
- local tlist=" $list"
- list=${tlist// ${list%% *} / }
- list=${list:1}
- myList="$myList ${list%% *}"
- done
- myList="${myList%* }";
-
- # We can select an object at an offset in the list
- [ $# -eq 2 ] && {
- cnt=0
- for item in $myList; do
- let cnt=cnt+1
- [ $cnt -eq $offset ] && {
- myList=$item
- }
- done
- [ $(get_node_count $myList) -ne 1 ] && myList=""
- }
- echo $myList
+ local hostlist=$1
+ local offset=$2
+ local myList
+ local item
+ local list
+
+ [ -z "$hostlist" ] && return
+
+ # Translate the case of [..],..,[..] to [..] .. [..]
+ list="${hostlist/],/] }"
+ front=${list%%[*}
+ [[ "$front" == *,* ]] && {
+ new="${list%,*} "
+ old="${list%,*},"
+ list=${list/${old}/${new}}
+ }
+
+ for item in $list; do
+ # Test if we have any []'s at all
+ if [ "$item" != "${item/\[/}" ]; then {
+ # Expand the [*] into list
+ name=${item%%[*}
+ back=${item#*]}
+
+ if [ "$name" != "$item" ]; then
+ group=${item#$name[*}
+ group=${group%%]*}
+
+ for range in ${group//,/ }; do
+ local order
+
+ begin=${range%-*}
+ end=${range#*-}
+
+ # Number of leading zeros
+ padlen=${#begin}
+ padlen2=${#end}
+ end=$(echo $end | sed 's/0*//')
+ [[ -z "$end" ]] && end=0
+ [[ $padlen2 -gt $padlen ]] && {
+ [[ $padlen2 -eq ${#end} ]] &&
+ padlen2=0
+ padlen=$padlen2
+ }
+ begin=$(echo $begin | sed 's/0*//')
+ [ -z $begin ] && begin=0
+
+ if [ ! -z "${begin##[!0-9]*}" ]; then
+ order=$(seq -f "%0${padlen}g" $begin $end)
+ else
+ order=$(eval echo {$begin..$end});
+ fi
+
+ for num in $order; do
+ value="${name#*,}${num}${back}"
+
+ [ "$value" != "${value/\[/}" ] && {
+ value=$(hostlist_expand "$value")
+ }
+ myList="$myList $value"
+ done
+ done
+ fi
+ } else {
+ myList="$myList $item"
+ } fi
+ done
+ myList=${myList//,/ }
+ myList=${myList:1} # Remove first character which is a space
+
+ # Filter any duplicates without sorting
+ list="$myList "
+ myList="${list%% *}"
+
+ while [[ "$list" != ${myList##* } ]]; do
+ local tlist=" $list"
+
+ list=${tlist// ${list%% *} / }
+ list=${list:1}
+ myList="$myList ${list%% *}"
+ done
+ myList="${myList%* }";
+
+ # We can select an object at an offset in the list
+ [ $# -eq 2 ] && {
+ cnt=0
+ for item in $myList; do
+ let cnt=cnt+1
+ [ $cnt -eq $offset ] && {
+ myList=$item
+ }
+ done
+ [ $(get_node_count $myList) -ne 1 ] && myList=""
+ }
+ echo $myList
}
facet_host() {
elif [ "${facet:0:3}" == "mdt" -o \
"${facet:0:3}" == "mds" -o \
"${facet:0:3}" == "mgs" ]; then
- eval export ${facet}_HOST=${mds_HOST}
+ local temp
+ if [ "${facet}" == "mgsfailover" ] &&
+ [ -n "$mds1failover_HOST" ]; then
+ temp=$mds1failover_HOST
+ else
+ temp=${mds_HOST}
+ fi
+ eval export ${facet}_HOST=$temp
fi
fi
echo -n ${!varname}
return
fi
+ if combined_mgs_mds && [ $facet == "mgs" ] &&
+ [ -n "$mds1failover_HOST" ]; then
+ echo $mds1failover_HOST
+ return
+ fi
+
if [ "${facet:0:3}" == "mdt" -o "${facet:0:3}" == "mds" -o \
"${facet:0:3}" == "mgs" ]; then
}
facet_active() {
- local facet=$1
- local activevar=${facet}active
+ local facet=$1
+ local activevar=${facet}active
- if [ -f $TMP/${facet}active ] ; then
- source $TMP/${facet}active
- fi
+ if [ -f $TMP/${facet}active ] ; then
+ source $TMP/${facet}active
+ fi
- active=${!activevar}
- if [ -z "$active" ] ; then
- echo -n ${facet}
- else
- echo -n ${active}
- fi
+ active=${!activevar}
+ if [ -z "$active" ] ; then
+ echo -n ${facet}
+ else
+ echo -n ${active}
+ fi
}
facet_active_host() {
}
change_active() {
- local facetlist=$1
- local facet
+ local facetlist=$1
+ local facet
- facetlist=$(exclude_items_from_list $facetlist mgs)
+ for facet in ${facetlist//,/ }; do
+ local failover=${facet}failover
+ local host=`facet_host $failover`
- for facet in ${facetlist//,/ }; do
- local failover=${facet}failover
- local host=`facet_host $failover`
- [ -z "$host" ] && return
+ [ -z "$host" ] && return
- local curactive=`facet_active $facet`
- if [ -z "${curactive}" -o "$curactive" == "$failover" ] ; then
- eval export ${facet}active=$facet
- else
- eval export ${facet}active=$failover
- fi
- # save the active host for this facet
- local activevar=${facet}active
- echo "$activevar=${!activevar}" > $TMP/$activevar
- [[ $facet = mds1 ]] && combined_mgs_mds && \
- echo "mgsactive=${!activevar}" > $TMP/mgsactive
- local TO=`facet_active_host $facet`
- echo "Failover $facet to $TO"
- done
+ local curactive=`facet_active $facet`
+
+ if [ -z "${curactive}" -o "$curactive" == "$failover" ] ; then
+ eval export ${facet}active=$facet
+ else
+ eval export ${facet}active=$failover
+ fi
+ # save the active host for this facet
+ local activevar=${facet}active
+
+ echo "$activevar=${!activevar}" > $TMP/$activevar
+ [[ $facet = mds1 ]] && combined_mgs_mds && \
+ echo "mgsactive=${!activevar}" > $TMP/mgsactive
+ local TO=`facet_active_host $facet`
+ echo "Failover $facet to $TO"
+ done
}
do_node() {
- local verbose=false
- # do not stripe off hostname if verbose, bug 19215
- if [ x$1 = x--verbose ]; then
- shift
- verbose=true
- fi
+ local verbose
+ local quiet
- local HOST=$1
- shift
- local myPDSH=$PDSH
- if [ "$HOST" = "$HOSTNAME" ]; then
- myPDSH="no_dsh"
- elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then
- echo "cannot run remote command on $HOST with $myPDSH"
- return 128
- fi
- if $VERBOSE; then
- echo "CMD: $HOST $@" >&2
- $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
- fi
+ # do not strip off hostname if verbose, b=19215
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
+ local HOST=$1
+ shift
+ local myPDSH=$PDSH
+
+ if [ "$HOST" = "$HOSTNAME" ]; then
+ myPDSH="no_dsh"
+ elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then
+ echo "cannot run remote command on $HOST with $myPDSH"
+ return 128
+ fi
+ if $VERBOSE && [[ -z "$quiet" ]]; then
+ echo "CMD: $HOST $*" >&2
+ $myPDSH $HOST "$LCTL mark \"$*\"" > /dev/null 2>&1 || :
+ fi
if [[ "$myPDSH" == "rsh" ]] ||
[[ "$myPDSH" == *pdsh* && "$myPDSH" != *-S* ]]; then
eval $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests;
PATH=\$PATH:/sbin:/usr/sbin;
cd $RPWD;
- LUSTRE=\"$RLUSTRE\" sh -c \"$@\") ||
+ LUSTRE=\"$RLUSTRE\" bash -c \"$*\") ||
echo command failed >$command_status"
[[ -n "$($myPDSH $HOST cat $command_status)" ]] && return 1 ||
return 0
fi
- if $verbose ; then
- # print HOSTNAME for myPDSH="no_dsh"
- if [[ $myPDSH = no_dsh ]]; then
- $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed -e "s/^/${HOSTNAME}: /"
- else
- $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")"
- fi
- else
- $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed "s/^${HOST}: //"
- fi
- return ${PIPESTATUS[0]}
+ if [[ -n "$verbose" ]]; then
+ # print HOSTNAME for myPDSH="no_dsh"
+ if [[ $myPDSH = no_dsh ]]; then
+ $myPDSH $HOST \
+ "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\
+ cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$*\")" |
+ sed -e "s/^/${HOSTNAME}: /"
+ else
+ $myPDSH $HOST \
+ "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\
+ cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$*\")"
+ fi
+ else
+ $myPDSH $HOST \
+ "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\
+ cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$*\")" |
+ sed "s/^${HOST}: //"
+ fi
+ return ${PIPESTATUS[0]}
}
-do_nodev() {
- do_node --verbose "$@"
+##
+# Execute exact command line on host
+#
+# The \a host may be on a local or remote node, which is determined at
+# the time the command is run. Does careful argument quotation to
+# ensure that the exact command line is executed without any globbing,
+# substitution, or shell interpretation on the remote side. Does not
+# support --verbose or --quiet. Does not include "$host: " prefixes on
+# output. See also do_facet_vp().
+#
+# usage: do_node_vp "$host" "$command" "$arg"...
+do_node_vp() {
+ local host="$1"
+ shift
+
+ if [[ "$host" == "$HOSTNAME" ]]; then
+ bash -c "$(printf -- ' %q' "$@")"
+ return $?
+ fi
+
+ if [[ "${PDSH}" != *pdsh* || "${PDSH}" != *-S* ]]; then
+ echo "cannot run '$*' on host '${host}' with PDSH='${PDSH}'" >&2
+ return 128
+ fi
+
+ # -N Disable hostname: prefix on lines of output.
+
+ $PDSH "${host}" -N "cd $RPWD; PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; export LUSTRE=$RLUSTRE; $(printf -- ' %q' "$@")"
}
single_local_node () {
- [ "$1" = "$HOSTNAME" ]
+ [ "$1" = "$HOSTNAME" ]
}
# Outputs environment variable assignments that should be passed to remote nodes
}
do_nodes() {
- local verbose=false
- # do not stripe off hostname if verbose, bug 19215
- if [ x$1 = x--verbose ]; then
- shift
- verbose=true
- fi
+ local verbose
+ local quiet
- local rnodes=$1
- shift
+ # do not strip off hostname if verbose, b=19215
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
- if single_local_node $rnodes; then
- if $verbose; then
- do_nodev $rnodes "$@"
- else
- do_node $rnodes "$@"
- fi
- return $?
- fi
+ local rnodes=$1
+ shift
- # This is part from do_node
- local myPDSH=$PDSH
+ if single_local_node $rnodes; then
+ do_node $verbose $quiet $rnodes "$@"
+ return $?
+ fi
- [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && \
- echo "cannot run remote command on $rnodes with $myPDSH" && return 128
+ # This is part from do_node
+ local myPDSH=$PDSH
- export FANOUT=$(get_node_count "${rnodes//,/ }")
- if $VERBOSE; then
- echo "CMD: $rnodes $@" >&2
- $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
- fi
+ [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] &&
+ echo "cannot run remote command on $rnodes with $myPDSH" &&
+ return 128
- # do not replace anything from pdsh output if -N is used
- # -N Disable hostname: prefix on lines of output.
- if $verbose || [[ $myPDSH = *-N* ]]; then
- $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")"
- else
- $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" | sed -re "s/^[^:]*: //g"
- fi
- return ${PIPESTATUS[0]}
+ export FANOUT=$(get_node_count "${rnodes//,/ }")
+ if $VERBOSE && [[ -z "$quiet" ]]; then
+ echo "CMD: $rnodes $*" >&2
+ $myPDSH $rnodes "$LCTL mark \"$*\"" > /dev/null 2>&1 || :
+ fi
+
+ # do not replace anything from pdsh output if -N is used
+ # -N Disable hostname: prefix on lines of output.
+ if [[ -n "$verbose" || $myPDSH = *-N* ]]; then
+ $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$*\")"
+ else
+ $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$*\")" | sed -re "s/^[^:]*: //g"
+ fi
+ return ${PIPESTATUS[0]}
}
##
#
# usage: do_facet $facet command [arg ...]
do_facet() {
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
local facet=$1
shift
- local HOST=$(facet_active_host $facet)
- [ -z $HOST ] && echo "No host defined for facet ${facet}" && exit 1
- do_node $HOST "$@"
+ local host=$(facet_active_host $facet)
+
+ [ -z "$host" ] && echo "No host defined for facet ${facet}" && exit 1
+ do_node $verbose $quiet $host "$@"
+}
+
+##
+# Execute exact command line on the host of a facet
+#
+# The \a facet (service) may be on a local or remote node, which is
+# determined at the time the command is run. Does careful argument
+# quotation to ensure that the exact command line is executed without
+# any globbing, substitution, or shell interpretation on the remote
+# side. Does not support --verbose or --quiet. Does not include
+# "$host: " prefixes on output.
+#
+# usage: do_facet_vp "$facet" "$command" "$arg"...
+do_facet_vp() {
+ local facet="$1"
+ local host=$(facet_active_host "$facet")
+ shift
+
+ if [[ -z "$host" ]]; then
+ echo "no host defined for facet ${facet}" >&2
+ exit 1
+ fi
+
+ do_node_vp "$host" "$@"
}
# Function: do_facet_random_file $FACET $FILE $SIZE
}
do_nodesv() {
- do_nodes --verbose "$@"
+ do_nodes --verbose "$@"
}
add() {
stop ${facet} -f
rm -f $TMP/${facet}active
[[ $facet = mds1 ]] && combined_mgs_mds && rm -f $TMP/mgsactive
+
+ # make sure in-tree ldiskfs is loaded before mkfs
+ if local_mode && [[ $(node_fstypes $HOSTNAME) == *ldiskfs* ]]; then
+ load_module ../ldiskfs/ldiskfs
+ fi
+
do_facet ${facet} $MKFS $* || return ${PIPESTATUS[0]}
if [[ $(facet_fstype $facet) == zfs ]]; then
error "unknown fstype!";;
esac
- echo -n $DEVPTR
+ echo -n $DEVPTR
}
# Physical device location of data
}
facet_mntpt () {
- local facet=$1
- [[ $facet = mgs ]] && combined_mgs_mds && facet="mds1"
+ local facet=$1
+ [[ $facet = mgs ]] && combined_mgs_mds && facet="mds1"
- local var=${facet}_MOUNT
- eval mntpt=${!var:-${MOUNT}-$facet}
+ local var=${facet}_MOUNT
+ eval mntpt=${!var:-${MOUNT}-$facet}
- echo -n $mntpt
+ echo -n $mntpt
}
mount_ldiskfs() {
# The add fn does rm ${facet}active file, this would be enough
# if we use do_facet <facet> only after the facet added, but
# currently we use do_facet mds in local.sh
+ local num
for num in `seq $MDSCOUNT`; do
stop mds$num -f
rm -f ${TMP}/mds${num}active
local opts
local fs_mkfs_opts
local var
+ local varbs=${facet}_BLOCKSIZE
if [ $type == MGS ] || ( [ $type == MDS ] &&
[ "$dev" == $(mgsdevname) ] &&
opts+=${LDLM_TIMEOUT:+" --param=sys.ldlm_timeout=$LDLM_TIMEOUT"}
if [ $type == MDS ]; then
- opts+=${MDSCAPA:+" --param-mdt.capa=$MDSCAPA"}
opts+=${DEF_STRIPE_SIZE:+" --param=lov.stripesize=$DEF_STRIPE_SIZE"}
opts+=${DEF_STRIPE_COUNT:+" --param=lov.stripecount=$DEF_STRIPE_COUNT"}
opts+=${L_GETIDENTITY:+" --param=mdt.identity_upcall=$L_GETIDENTITY"}
if [ $fstype == ldiskfs ]; then
- fs_mkfs_opts+="-O ea_inode,large_dir"
-
var=${facet}_JRN
if [ -n "${!var}" ]; then
fs_mkfs_opts+=" -J device=${!var}"
fi
if [ $type == OST ]; then
- opts+=${OSSCAPA:+" --param=ost.capa=$OSSCAPA"}
-
if [ $fstype == ldiskfs ]; then
var=${facet}_JRN
if [ -n "${!var}" ]; then
var=${type}_FS_MKFS_OPTS
fs_mkfs_opts+=${!var:+" ${!var}"}
+ [[ "$QUOTA_TYPE" =~ "p" ]] && fs_mkfs_opts+=" -O project"
+
+ [ $fstype == ldiskfs ] && fs_mkfs_opts+=" -b ${!varbs:-$BLCKSIZE}"
[ $fstype == ldiskfs ] && fs_mkfs_opts=$(squash_opt $fs_mkfs_opts)
if [ -n "${fs_mkfs_opts## }" ]; then
grep " $1 " /proc/mounts && zconf_umount $HOSTNAME $*
}
-# return value:
-# 0: success, the old identity set already.
-# 1: success, the old identity does not set.
+# usage: switch_identity MDSNUM ENABLE_UPCALL
+#
+# return values:
+# 0: success, the identity upcall was previously enabled already.
+# 1: success, the identity upcall was previously disabled.
# 2: fail.
switch_identity() {
- local num=$1
- local switch=$2
- local j=`expr $num - 1`
- local MDT="`(do_facet mds$num lctl get_param -N mdt.*MDT*$j 2>/dev/null | cut -d"." -f2 2>/dev/null) || true`"
+ local num=$1
+ local enable=$2
+ local facet=mds$num
+ local MDT="$(mdtname_from_index $((num - 1)) $MOUNT)"
+ local upcall="$L_GETIDENTITY"
- if [ -z "$MDT" ]; then
- return 2
- fi
+ [[ -n "$MDT" ]] || return 2
- local old="`do_facet mds$num "lctl get_param -n mdt.$MDT.identity_upcall"`"
+ local param="mdt.$MDT.identity_upcall"
+ local old="$(do_facet $facet "lctl get_param -n $param")"
- if $switch; then
- do_facet mds$num "lctl set_param -n mdt.$MDT.identity_upcall \"$L_GETIDENTITY\""
- else
- do_facet mds$num "lctl set_param -n mdt.$MDT.identity_upcall \"NONE\""
- fi
+ [[ "$enable" == "true" ]] || upcall="NONE"
- do_facet mds$num "lctl set_param -n mdt/$MDT/identity_flush=-1"
+ do_facet $facet "lctl set_param -n $param='$upcall'" || return 2
+ do_facet $facet "lctl set_param -n mdt.$MDT.identity_flush=-1"
- if [ $old = "NONE" ]; then
- return 1
- else
- return 0
- fi
+ [[ "$old" != "NONE" ]] # implicit "&& return 0 || return 1"
}
remount_client()
eval $varname=$host
fi
done
- if [ $IDENTITY_UPCALL != "default" ]; then
+ if [[ "$IDENTITY_UPCALL" != "default" ]]; then
switch_identity $num $IDENTITY_UPCALL
fi
done
}
+unmountoss() {
+ local num
+
+ for num in $(seq $OSTCOUNT); do
+ stop ost$num -f
+ rm -f $TMP/ost${num}active
+ done
+}
+
mountoss() {
local num
local devname
shift
eval export ${facet}_dev=${device}
- eval export ${facet}_opt=\"$@\"
+ eval export ${facet}_opt=\"$*\"
local dev=${facet}_dev
local varname=${facet}failover_HOST
if [ -z "${!varname}" ]; then
- eval export $varname=$(facet_host $facet)
+ local temp
+ if combined_mgs_mds && [ $facet == "mgs" ] &&
+ [ -n "$mds1failover_HOST" ]; then
+ temp=$mds1failover_HOST
+ else
+ temp=$(facet_host $facet)
+ fi
+ eval export $varname=$temp
fi
varname=${facet}_HOST
if ! remote_mds_nodsh; then
for num in $(seq $MDSCOUNT); do
- DEVNAME=`mdsdevname $num`
+ DEVNAME=$(mdsdevname $num)
init_facet_vars mds$num $DEVNAME $MDS_MOUNT_OPTS
done
fi
- combined_mgs_mds || init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS
+ init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS
if ! remote_ost_nodsh; then
for num in $(seq $OSTCOUNT); do
}
osc_ensure_active () {
- local facet=$1
- local timeout=$2
- local period=0
+ local facet=$1
+ local timeout=$2
+ local period=0
- while [ $period -lt $timeout ]; do
- count=$(do_facet $facet "lctl dl | grep ' IN osc ' 2>/dev/null | wc -l")
- if [ $count -eq 0 ]; then
- break
- fi
+ while [ $period -lt $timeout ]; do
+ count=$(do_facet $facet "lctl dl | grep ' IN osc ' 2>/dev/null | wc -l")
+ if [ $count -eq 0 ]; then
+ break
+ fi
- echo "There are $count OST are inactive, wait $period seconds, and try again"
- sleep 3
- period=$((period+3))
- done
+ echo "$count OST inactive, wait $period seconds, and try again"
+ sleep 3
+ period=$((period+3))
+ done
- [ $period -lt $timeout ] || log "$count OST are inactive after $timeout seconds, give up"
+ [ $period -lt $timeout ] ||
+ log "$count OST are inactive after $timeout seconds, give up"
}
set_conf_param_and_check() {
TIMEOUT=$(do_facet $SINGLEMDS "lctl get_param -n timeout")
log "Using TIMEOUT=$TIMEOUT"
+ # tune down to speed up testing on (usually) small setups
+ local mgc_timeout=/sys/module/mgc/parameters/mgc_requeue_timeout_min
+ do_nodes $(comma_list $(nodes_list)) \
+ "[ -f $mgc_timeout ] && echo 1 > $mgc_timeout; exit 0"
+
osc_ensure_active $SINGLEMDS $TIMEOUT
osc_ensure_active client $TIMEOUT
$LCTL set_param osc.*.idle_timeout=debug
fi
(( MDS1_VERSION <= $(version_code 2.13.52) )) ||
- do_nodes $(comma_list $(mdts_nodes)) \
- "$LCTL set_param lod.*.mdt_hash=crush"
+ do_facet mgs "$LCTL set_param -P lod.*.mdt_hash=crush"
return 0
}
nfs_client_mode () {
- if [ "$NFSCLIENT" ]; then
- echo "NFSCLIENT mode: setup, cleanup, check config skipped"
- local clients=$CLIENTS
- [ -z $clients ] && clients=$(hostname)
-
- # FIXME: remove hostname when 19215 fixed
- do_nodes $clients "echo \\\$(hostname); grep ' '$MOUNT' ' /proc/mounts"
- declare -a nfsexport=(`grep ' '$MOUNT' ' /proc/mounts | awk '{print $1}' | awk -F: '{print $1 " " $2}'`)
- if [[ ${#nfsexport[@]} -eq 0 ]]; then
- error_exit NFSCLIENT=$NFSCLIENT mode, but no NFS export found!
- fi
- do_nodes ${nfsexport[0]} "echo \\\$(hostname); df -T ${nfsexport[1]}"
- return
- fi
- return 1
+ if [ "$NFSCLIENT" ]; then
+ echo "NFSCLIENT mode: setup, cleanup, check config skipped"
+ local clients=$CLIENTS
+
+ [ -z $clients ] && clients=$(hostname)
+
+ # FIXME: remove hostname when 19215 fixed
+ do_nodes $clients "echo \\\$(hostname); grep ' '$MOUNT' ' /proc/mounts"
+ declare -a nfsexport=(`grep ' '$MOUNT' ' /proc/mounts |
+ awk '{print $1}' | awk -F: '{print $1 " " $2}'`)
+ if [[ ${#nfsexport[@]} -eq 0 ]]; then
+ error_exit NFSCLIENT=$NFSCLIENT mode, but no NFS export found!
+ fi
+ do_nodes ${nfsexport[0]} "echo \\\$(hostname); df -T ${nfsexport[1]}"
+ return
+ fi
+ return 1
}
cifs_client_mode () {
}
check_config_client () {
- local mntpt=$1
-
- local mounted=$(mount | grep " $mntpt ")
- if [ -n "$CLIENTONLY" ]; then
- # bug 18021
- # CLIENTONLY should not depend on *_HOST settings
- local mgc=$($LCTL device_list | awk '/MGC/ {print $4}')
- # in theory someone could create a new,
- # client-only config file that assumed lustre was already
- # configured and didn't set the MGSNID. If MGSNID is not set,
- # then we should use the mgs nid currently being used
- # as the default value. bug 18021
- [[ x$MGSNID = x ]] &&
- MGSNID=${mgc//MGC/}
-
- if [[ x$mgc != xMGC$MGSNID ]]; then
- if [ "$mgs_HOST" ]; then
- local mgc_ip=$(ping -q -c1 -w1 $mgs_HOST | grep PING | awk '{print $3}' | sed -e "s/(//g" -e "s/)//g")
-# [[ x$mgc = xMGC$mgc_ip@$NETTYPE ]] ||
-# error_exit "MGSNID=$MGSNID, mounted: $mounted, MGC : $mgc"
- fi
- fi
- return 0
- fi
+ local mntpt=$1
+ local mounted=$(mount | grep " $mntpt ")
+
+ if [ -n "$CLIENTONLY" ]; then
+ # bug 18021
+ # CLIENTONLY should not depend on *_HOST settings
+ local mgc=$($LCTL device_list | awk '/MGC/ {print $4}')
+ # in theory someone could create a new,
+ # client-only config file that assumed lustre was already
+ # configured and didn't set the MGSNID. If MGSNID is not set,
+ # then we should use the mgs nid currently being used
+ # as the default value. bug 18021
+ [[ x$MGSNID = x ]] &&
+ MGSNID=${mgc//MGC/}
+
+ if [[ x$mgc != xMGC$MGSNID ]]; then
+ if [ "$mgs_HOST" ]; then
+ local mgc_ip=$(ping -q -c1 -w1 $mgs_HOST |
+ grep PING | awk '{print $3}' |
+ sed -e "s/(//g" -e "s/)//g")
+
+ # [[ x$mgc = xMGC$mgc_ip@$NETTYPE ]] ||
+ # error_exit "MGSNID=$MGSNID, mounted: $mounted, MGC : $mgc"
+ fi
+ fi
+ return 0
+ fi
- echo Checking config lustre mounted on $mntpt
- local mgshost=$(mount | grep " $mntpt " | awk -F@ '{print $1}')
- mgshost=$(echo $mgshost | awk -F: '{print $1}')
+ echo Checking config lustre mounted on $mntpt
+ local mgshost=$(mount | grep " $mntpt " | awk -F@ '{print $1}')
+ mgshost=$(echo $mgshost | awk -F: '{print $1}')
}
}
check_timeout () {
- local mdstimeout=$(do_facet $SINGLEMDS "lctl get_param -n timeout")
- local cltimeout=$(lctl get_param -n timeout)
- if [ $mdstimeout -ne $TIMEOUT ] || [ $mdstimeout -ne $cltimeout ]; then
- error "timeouts are wrong! mds: $mdstimeout, client: $cltimeout, TIMEOUT=$TIMEOUT"
- return 1
- fi
+ local mdstimeout=$(do_facet $SINGLEMDS "lctl get_param -n timeout")
+ local cltimeout=$(lctl get_param -n timeout)
+ if [ $mdstimeout -ne $TIMEOUT ] || [ $mdstimeout -ne $cltimeout ]; then
+ error "timeouts are wrong! mds: $mdstimeout, client: $cltimeout, TIMEOUT=$TIMEOUT"
+ return 1
+ fi
}
is_mounted () {
echo $mounted' ' | grep -w -q $mntpt' '
}
-is_empty_dir() {
- [ $(find $1 -maxdepth 1 -print | wc -l) = 1 ] && return 0
- return 1
+create_pools () {
+ local pool=$1
+ local ostsn=${2:-$OSTCOUNT}
+ local npools=${FS_NPOOLS:-$((OSTCOUNT / ostsn))}
+ local n
+
+ echo ostsn=$ostsn npools=$npools
+ if [[ $ostsn -gt $OSTCOUNT ]]; then
+ echo "request to use $ostsn OSTs in the pool, \
+ using max available OSTCOUNT=$OSTCOUNT"
+ ostsn=$OSTCOUNT
+ fi
+ for (( n=0; n < $npools; n++ )); do
+ p=${pool}$n
+ if ! $DELETE_OLD_POOLS; then
+ log "request to not delete old pools: $FSNAME.$p exist?"
+ if ! check_pool_not_exist $FSNAME.$p; then
+ echo "Using existing $FSNAME.$p"
+ $LCTL pool_list $FSNAME.$p
+ continue
+ fi
+ fi
+ create_pool $FSNAME.$p $KEEP_POOLS ||
+ error "create_pool $FSNAME.$p failed"
+
+ local first=$(( (n * ostsn) % OSTCOUNT ))
+ local last=$(( (first + ostsn - 1) % OSTCOUNT ))
+ if [[ $first -le $last ]]; then
+ pool_add_targets $p $first $last ||
+ error "pool_add_targets $p $first $last failed"
+ else
+ pool_add_targets $p $first $(( OSTCOUNT - 1 )) ||
+ error "pool_add_targets $p $first \
+ $(( OSTCOUNT - 1 )) failed"
+ pool_add_targets $p 0 $last ||
+ error "pool_add_targets $p 0 $last failed"
+ fi
+ done
}
-# empty lustre filesystem may have empty directories lost+found and .lustre
-is_empty_fs() {
- # exclude .lustre & lost+found
- [ $(find $1 -maxdepth 1 -name lost+found -o -name .lustre -prune -o \
- -print | wc -l) = 1 ] || return 1
- [ ! -d $1/lost+found ] || is_empty_dir $1/lost+found || return 1
- if [ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.4.0) ]; then
- # exclude .lustre/fid (LU-2780)
- [ $(find $1/.lustre -maxdepth 1 -name fid -prune -o \
- -print | wc -l) = 1 ] || return 1
- else
- [ ! -d $1/.lustre ] || is_empty_dir $1/.lustre || return 1
- fi
- return 0
+set_pools_quota () {
+ local u
+ local o
+ local p
+ local i
+ local j
+
+ [[ $ENABLE_QUOTA ]] || error "Required Pool Quotas: \
+ $POOLS_QUOTA_USERS_SET, but ENABLE_QUOTA not set!"
+
+ # POOLS_QUOTA_USERS_SET=
+ # "quota15_1:20M -- for all of the found pools
+ # quota15_2:1G:gpool0
+ # quota15_3 -- for global limit only
+ # quota15_4:200M:gpool0
+ # quota15_4:200M:gpool1"
+
+ declare -a pq_userset=(${POOLS_QUOTA_USERS_SET="mpiuser"})
+ declare -a pq_users
+ declare -A pq_limits
+
+ for ((i=0; i<${#pq_userset[@]}; i++)); do
+ u=${pq_userset[i]%%:*}
+ o=""
+ # user gets no pool limits if
+ # POOLS_QUOTA_USERS_SET does not specify it
+ [[ ${pq_userset[i]} =~ : ]] && o=${pq_userset[i]##$u:}
+ pq_limits[$u]+=" $o"
+ done
+ pq_users=(${!pq_limits[@]})
+
+ declare -a opts
+ local pool
+
+ for ((i=0; i<${#pq_users[@]}; i++)); do
+ u=${pq_users[i]}
+ # set to max limit (_u64)
+ $LFS setquota -u $u -B $((2**24 - 1))T $DIR
+ opts=(${pq_limits[$u]})
+ for ((j=0; j<${#opts[@]}; j++)); do
+ p=${opts[j]##*:}
+ o=${opts[j]%%:*}
+ # Set limit for all existing pools if
+ # no pool specified
+ if [ $p == $o ]; then
+ p=$(list_pool $FSNAME | sed "s/$FSNAME.//")
+ echo "No pool specified for $u,
+ set limit $o for all existing pools"
+ fi
+ for pool in $p; do
+ $LFS setquota -u $u -B $o --pool $pool $DIR ||
+ error "setquota -u $u -B $o \
+ --pool $pool failed"
+ done
+ done
+ $LFS quota -uv $u --pool $DIR
+ done
}
-check_and_setup_lustre() {
+do_check_and_setup_lustre() {
+ # If auster does not want us to setup, then don't.
+ ! ${do_setup} && return
+
+ echo "=== $TESTSUITE: start setup $(date +'%H:%M:%S (%s)') ==="
+
sanitize_parameters
nfs_client_mode && return
cifs_client_mode && return
is_mounted $MOUNT || error "NAME=$NAME not mounted"
export I_MOUNTED=yes
do_check=false
- # 2.
- # MOUNT2 is mounted
- elif is_mounted $MOUNT2; then
- # 3.
- # MOUNT2 is mounted, while MOUNT_2 is not set
- if ! [ "$MOUNT_2" ]; then
- cleanup_mount $MOUNT2
- export I_UMOUNTED2=yes
-
- # 4.
- # MOUNT2 is mounted, MOUNT_2 is set
- else
- # FIXME: what to do if check_config failed?
- # i.e. if:
- # 1) remote client has mounted other Lustre fs ?
- # 2) it has insane env ?
- # let's try umount MOUNT2 on all clients and mount it again:
- if ! check_config_clients $MOUNT2; then
- cleanup_mount $MOUNT2
- restore_mount $MOUNT2
- export I_MOUNTED2=yes
- fi
- fi
-
- # 5.
- # MOUNT is mounted MOUNT2 is not mounted
- elif [ "$MOUNT_2" ]; then
- restore_mount $MOUNT2
- export I_MOUNTED2=yes
- fi
+ # 2.
+ # MOUNT2 is mounted
+ elif is_mounted $MOUNT2; then
+ # 3.
+ # MOUNT2 is mounted, while MOUNT_2 is not set
+ if ! [ "$MOUNT_2" ]; then
+ cleanup_mount $MOUNT2
+ export I_UMOUNTED2=yes
+
+ # 4.
+ # MOUNT2 is mounted, MOUNT_2 is set
+ else
+ # FIXME: what to do if check_config failed?
+ # i.e. if:
+ # 1) remote client has mounted other Lustre fs ?
+ # 2) it has insane env ?
+ # try to umount MOUNT2 on all clients and mount again:
+ if ! check_config_clients $MOUNT2; then
+ cleanup_mount $MOUNT2
+ restore_mount $MOUNT2
+ export I_MOUNTED2=yes
+ fi
+ fi
+ # 5.
+ # MOUNT is mounted MOUNT2 is not mounted
+ elif [ "$MOUNT_2" ]; then
+ restore_mount $MOUNT2
+ export I_MOUNTED2=yes
+ fi
- if $do_check; then
- # FIXME: what to do if check_config failed?
- # i.e. if:
- # 1) remote client has mounted other Lustre fs?
- # 2) lustre is mounted on remote_clients atall ?
- check_config_clients $MOUNT
- init_facets_vars
- init_param_vars
+ if $do_check; then
+ # FIXME: what to do if check_config failed?
+ # i.e. if:
+ # 1) remote client has mounted other Lustre fs?
+ # 2) lustre is mounted on remote_clients atall ?
+ check_config_clients $MOUNT
+ init_facets_vars
+ init_param_vars
- set_default_debug_nodes $(comma_list $(nodes_list))
- fi
+ set_default_debug_nodes $(comma_list $(nodes_list))
+ set_params_clients
+ fi
if [ -z "$CLIENTONLY" -a $(lower $OSD_TRACK_DECLARES_LBUG) == 'yes' ]; then
local facets=""
if [ -n "$fs_STRIPEPARAMS" ]; then
setstripe_getstripe $MOUNT $fs_STRIPEPARAMS
fi
+
if $GSS_SK; then
set_flavor_all null
elif $GSS; then
set_flavor_all $SEC
fi
- if [ "$ONLY" == "setup" ]; then
+ if $DELETE_OLD_POOLS; then
+ destroy_all_pools
+ fi
+
+ if [[ -n "$FS_POOL" ]]; then
+ create_pools $FS_POOL $FS_POOL_NOSTS
+ fi
+
+ if [[ -n "$POOLS_QUOTA_USERS_SET" ]]; then
+ set_pools_quota
+ fi
+
+ # set tunable parameters passed to test environment
+ set_params_clients
+ set_params_mdts
+ set_params_osts
+ echo "=== $TESTSUITE: finish setup $(date +'%H:%M:%S (%s)') ==="
+
+ if [[ "$ONLY" == "setup" ]]; then
exit 0
fi
}
+check_and_setup_lustre() {
+ local start_stamp=$(date +%s)
+ local saved_umask=$(umask)
+ local log=$TESTLOG_PREFIX.test_setup.test_log.$(hostname -s).log
+ local status='PASS'
+ local stop_stamp=0
+ local duration=0
+ local error=''
+ local rc=0
+
+ umask 0022
+
+ log_sub_test_begin test_setup
+
+ if ! do_check_and_setup_lustre 2>&1 > >(tee -i $log); then
+ error=$(tail -1 $log)
+ status='FAIL'
+ rc=1
+ fi
+
+ stop_stamp=$(date +%s)
+ duration=$((stop_stamp - start_stamp))
+
+ log_sub_test_end "$status" "$duration" "$rc" "$error"
+
+ umask $saved_umask
+
+ return $rc
+}
+
restore_mount () {
- local clients=${CLIENTS:-$HOSTNAME}
- local mntpt=$1
+ local clients=${CLIENTS:-$HOSTNAME}
+ local mntpt=$1
- zconf_mount_clients $clients $mntpt
+ zconf_mount_clients $clients $mntpt
}
cleanup_mount () {
}
cleanup_and_setup_lustre() {
- if [ "$ONLY" == "cleanup" -o "`mount | grep $MOUNT`" ]; then
- lctl set_param debug=0 || true
- cleanupall
- if [ "$ONLY" == "cleanup" ]; then
- exit 0
- fi
- fi
- check_and_setup_lustre
-}
+ if [[ "$ONLY" == "cleanup" ]] || grep -q "$MOUNT" /proc/mounts; then
+ lctl set_param debug=0 || true
+ cleanupall
-# Get all of the server target devices from a given server node and type.
-get_mnt_devs() {
- local node=$1
- local type=$2
- local devs
- local dev
-
- if [ "$type" == ost ]; then
- devs=$(get_osd_param $node "" mntdev)
- else
- devs=$(do_node $node $LCTL get_param -n osd-*.$FSNAME-M*.mntdev)
+ if [[ "$ONLY" == "cleanup" ]]; then
+ exit 0
+ fi
fi
- for dev in $devs; do
- case $dev in
- *loop*) do_node $node "losetup $dev" | \
- sed -e "s/.*(//" -e "s/).*//" ;;
- *) echo $dev ;;
- esac
- done
-}
-
-# Get all of the server target devices.
-get_svr_devs() {
- local node
- local i
- # Master MDS parameters used by lfsck
- MDTNODE=$(facet_active_host $SINGLEMDS)
- MDTDEV=$(echo $(get_mnt_devs $MDTNODE mdt) | awk '{print $1}')
-
- # MDT devices
- i=0
- for node in $(mdts_nodes); do
- MDTDEVS[i]=$(get_mnt_devs $node mdt)
- i=$((i + 1))
- done
-
- # OST devices
- i=0
- for node in $(osts_nodes); do
- OSTDEVS[i]=$(get_mnt_devs $node ost)
- i=$((i + 1))
- done
+ do_check_and_setup_lustre
}
# Run e2fsck on MDT or OST device.
local log=$TMP/e2fsck.log
local rc=0
+ # turn on pfsck if it is supported
+ do_node $node $E2FSCK -h 2>&1 | grep -qw -- -m && cmd+=" -m8"
echo $cmd
do_node $node $cmd 2>&1 | tee $log
rc=${PIPESTATUS[0]}
fi
}
-check_and_cleanup_lustre() {
- if [ "$LFSCK_ALWAYS" = "yes" -a "$TESTSUITE" != "sanity-lfsck" -a \
- "$TESTSUITE" != "sanity-scrub" ]; then
+do_check_and_cleanup_lustre() {
+ echo "=== $TESTSUITE: start cleanup $(date +'%H:%M:%S (%s)') ==="
+
+ if [[ "$LFSCK_ALWAYS" == "yes" && "$TESTSUITE" != "sanity-lfsck" && \
+ "$TESTSUITE" != "sanity-scrub" ]]; then
run_lfsck
fi
if is_mounted $MOUNT; then
if $DO_CLEANUP; then
- [ -n "$DIR" ] && rm -rf $DIR/[Rdfs][0-9]* ||
+ [[ -n "$DIR" ]] && rm -rf $DIR/[Rdfs][0-9]* ||
error "remove sub-test dirs failed"
else
echo "skip cleanup"
fi
- [ "$ENABLE_QUOTA" ] && restore_quota || true
+ [[ -n "$ENABLE_QUOTA" ]] && restore_quota || true
fi
- if [ "$I_UMOUNTED2" = "yes" ]; then
+ if [[ "$I_UMOUNTED2" == "yes" ]]; then
restore_mount $MOUNT2 || error "restore $MOUNT2 failed"
fi
- if [ "$I_MOUNTED2" = "yes" ]; then
+ if [[ "$I_MOUNTED2" == "yes" ]]; then
cleanup_mount $MOUNT2
fi
- if [[ "$I_MOUNTED" = "yes" ]] && ! $AUSTER_CLEANUP; then
+ if [[ "$I_MOUNTED" == "yes" ]] && ! $AUSTER_CLEANUP; then
cleanupall -f || error "cleanup failed"
unset I_MOUNTED
fi
+
+ echo "=== $TESTSUITE: finish cleanup $(date +'%H:%M:%S (%s)') ==="
+}
+
+check_and_cleanup_lustre() {
+ local start_stamp=$(date +%s)
+ local saved_umask=$(umask)
+ local log=$TESTLOG_PREFIX.test_cleanup.test_log.$(hostname -s).log
+ local status='PASS'
+ local stop_stamp=0
+ local duration=0
+ local error=''
+ local rc=0
+
+ umask 0022
+
+ log_sub_test_begin test_cleanup
+
+ if ! do_check_and_cleanup_lustre 2>&1 > >(tee -i $log); then
+ error=$(tail -1 $log)
+ status='FAIL'
+ rc=1
+ fi
+
+ stop_stamp=$(date +%s)
+ duration=$((stop_stamp - start_stamp))
+
+ log_sub_test_end "$status" "$duration" "$rc" "$error"
+
+ umask $saved_umask
+
+ return $rc
}
#######
# General functions
wait_for_function () {
- local quiet=""
+ local quiet=""
- # suppress fn both stderr and stdout
- if [ "$1" = "--quiet" ]; then
- shift
- quiet=" > /dev/null 2>&1"
-
- fi
+ # suppress fn both stderr and stdout
+ if [ "$1" = "--quiet" ]; then
+ shift
+ quiet=" > /dev/null 2>&1"
+ fi
- local fn=$1
- local max=${2:-900}
- local sleep=${3:-5}
+ local fn=$1
+ local max=${2:-900}
+ local sleep=${3:-5}
- local wait=0
+ local wait=0
- while true; do
+ while true; do
- eval $fn $quiet && return 0
+ eval $fn $quiet && return 0
- wait=$((wait + sleep))
- [ $wait -lt $max ] || return 1
- echo waiting $fn, $((max - wait)) secs left ...
- sleep $sleep
- done
+ [ $wait -lt $max ] || return 1
+ echo waiting $fn, $((max - wait)) secs left ...
+ wait=$((wait + sleep))
+ [ $wait -gt $max ] && ((sleep -= wait - max))
+ sleep $sleep
+ done
}
check_network() {
}
no_dsh() {
- shift
- eval $@
+ shift
+ eval "$@"
}
# Convert a space-delimited list to a comma-delimited list. If the input is
}
list_member () {
- local list=$1
- local item=$2
- echo $list | grep -qw $item
+ local list=$1
+ local item=$2
+ echo $list | grep -qw $item
}
# list, excluded are the comma separated lists
exclude_items_from_list () {
- local list=$1
- local excluded=$2
- local item
+ local list=$1
+ local excluded=$2
+ local item
- list=${list//,/ }
- for item in ${excluded//,/ }; do
- list=$(echo " $list " | sed -re "s/\s+$item\s+/ /g")
- done
- echo $(comma_list $list)
+ list=${list//,/ }
+ for item in ${excluded//,/ }; do
+ list=$(echo " $list " | sed -re "s/\s+$item\s+/ /g")
+ done
+ echo $(comma_list $list)
}
# list, expand are the comma separated lists
expand_list () {
- local list=${1//,/ }
- local expand=${2//,/ }
- local expanded=
+ local list=${1//,/ }
+ local expand=${2//,/ }
+ local expanded=
- expanded=$(for i in $list $expand; do echo $i; done | sort -u)
- echo $(comma_list $expanded)
+ expanded=$(for i in $list $expand; do echo $i; done | sort -u)
+ echo $(comma_list $expanded)
}
testslist_filter () {
- local script=$LUSTRE/tests/${TESTSUITE}.sh
+ local script=$LUSTRE/tests/${TESTSUITE}.sh
- [ -f $script ] || return 0
+ [ -f $script ] || return 0
- local start_at=$START_AT
- local stop_at=$STOP_AT
+ local start_at=$START_AT
+ local stop_at=$STOP_AT
- local var=${TESTSUITE//-/_}_START_AT
- [ x"${!var}" != x ] && start_at=${!var}
- var=${TESTSUITE//-/_}_STOP_AT
- [ x"${!var}" != x ] && stop_at=${!var}
+ local var=${TESTSUITE//-/_}_START_AT
+ [ x"${!var}" != x ] && start_at=${!var}
+ var=${TESTSUITE//-/_}_STOP_AT
+ [ x"${!var}" != x ] && stop_at=${!var}
- sed -n 's/^test_\([^ (]*\).*/\1/p' $script | \
+ sed -n 's/^test_\([^ (]*\).*/\1/p' $script |
awk ' BEGIN { if ("'${start_at:-0}'" != 0) flag = 1 }
/^'${start_at}'$/ {flag = 0}
{if (flag == 1) print $0}
}
absolute_path() {
- (cd `dirname $1`; echo $PWD/`basename $1`)
+ (cd `dirname $1`; echo $PWD/`basename $1`)
}
get_facets () {
- local types=${1:-"OST MDS MGS"}
-
- local list=""
-
- for entry in $types; do
- local name=$(echo $entry | tr "[:upper:]" "[:lower:]")
- local type=$(echo $entry | tr "[:lower:]" "[:upper:]")
-
- case $type in
- MGS ) list="$list $name";;
- MDS|OST|AGT ) local count=${type}COUNT
- for ((i=1; i<=${!count}; i++)) do
- list="$list ${name}$i"
- done;;
- * ) error "Invalid facet type"
- exit 1;;
- esac
- done
- echo $(comma_list $list)
+ local types=${1:-"OST MDS MGS"}
+
+ local list=""
+
+ for entry in $types; do
+ local name=$(echo $entry | tr "[:upper:]" "[:lower:]")
+ local type=$(echo $entry | tr "[:lower:]" "[:upper:]")
+
+ case $type in
+ MGS ) list="$list $name";;
+ MDS|OST|AGT ) local count=${type}COUNT
+ for ((i=1; i<=${!count}; i++)) do
+ list="$list ${name}$i"
+ done;;
+ * ) error "Invalid facet type"
+ exit 1;;
+ esac
+ done
+ echo $(comma_list $list)
}
##################################
# Adaptive Timeouts funcs
at_is_enabled() {
- # only check mds, we assume at_max is the same on all nodes
- local at_max=$(do_facet $SINGLEMDS "lctl get_param -n at_max")
- if [ $at_max -eq 0 ]; then
- return 1
- else
- return 0
- fi
+ # only check mds, we assume at_max is the same on all nodes
+ local at_max=$(do_facet $SINGLEMDS "lctl get_param -n at_max")
+
+ if [ $at_max -eq 0 ]; then
+ return 1
+ else
+ return 0
+ fi
}
at_get() {
- local facet=$1
- local at=$2
+ local facet=$1
+ local at=$2
- # suppose that all ost-s have the same $at value set
- [ $facet != "ost" ] || facet=ost1
+ # suppose that all ost-s have the same $at value set
+ [ $facet != "ost" ] || facet=ost1
- do_facet $facet "lctl get_param -n $at"
+ do_facet $facet "lctl get_param -n $at"
}
at_max_get() {
- at_get $1 at_max
+ at_get $1 at_max
+}
+
+at_max_set() {
+ local at_max=$1
+ shift
+
+ local facet
+ local hosts
+
+ for facet in "$@"; do
+ if [ $facet == "ost" ]; then
+ facet=$(get_facets OST)
+ elif [ $facet == "mds" ]; then
+ facet=$(get_facets MDS)
+ fi
+ hosts=$(expand_list $hosts $(facets_hosts $facet))
+ done
+
+ do_nodes $hosts lctl set_param at_max=$at_max
}
at_min_get() {
at_get $1 at_min
}
-at_max_set() {
- local at_max=$1
- shift
+at_min_set() {
+ local at_min=$1
+ shift
- local facet
- local hosts
- for facet in $@; do
- if [ $facet == "ost" ]; then
- facet=$(get_facets OST)
- elif [ $facet == "mds" ]; then
- facet=$(get_facets MDS)
- fi
- hosts=$(expand_list $hosts $(facets_hosts $facet))
- done
+ local facet
+ local hosts
+
+ for facet in "$@"; do
+ if [ $facet == "ost" ]; then
+ facet=$(get_facets OST)
+ elif [ $facet == "mds" ]; then
+ facet=$(get_facets MDS)
+ fi
+ hosts=$(expand_list $hosts $(facets_hosts $facet))
+ done
- do_nodes $hosts lctl set_param at_max=$at_max
+ do_nodes $hosts lctl set_param at_min=$at_min
}
##################################
drop_request() {
# OBD_FAIL_MDS_ALL_REQUEST_NET
- RC=0
- do_facet $SINGLEMDS lctl set_param fail_val=0 fail_loc=0x123
- do_facet client "$1" || RC=$?
- do_facet $SINGLEMDS lctl set_param fail_loc=0
- return $RC
+ RC=0
+ do_facet $SINGLEMDS lctl set_param fail_val=0 fail_loc=0x123
+ do_facet client "$1" || RC=$?
+ do_facet $SINGLEMDS lctl set_param fail_loc=0
+ return $RC
}
drop_reply() {
drop_mdt_ldlm_reply() {
#define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157
- RC=0
- local list=$(comma_list $(mdts_nodes))
- do_nodes $list lctl set_param fail_loc=0x157
+ RC=0
+ local list=$(comma_list $(mdts_nodes))
- do_facet client "$@" || RC=$?
+ do_nodes $list lctl set_param fail_loc=0x157
- do_nodes $list lctl set_param fail_loc=0
- return $RC
+ do_facet client "$@" || RC=$?
+
+ do_nodes $list lctl set_param fail_loc=0
+ return $RC
}
drop_mdt_ldlm_reply_once() {
#define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157
- RC=0
- local list=$(comma_list $(mdts_nodes))
- do_nodes $list lctl set_param fail_loc=0x80000157
+ RC=0
+ local list=$(comma_list $(mdts_nodes))
- do_facet client "$@" || RC=$?
+ do_nodes $list lctl set_param fail_loc=0x80000157
- do_nodes $list lctl set_param fail_loc=0
- return $RC
+ do_facet client "$@" || RC=$?
+
+ do_nodes $list lctl set_param fail_loc=0
+ return $RC
}
clear_failloc() {
- facet=$1
- pause=$2
- sleep $pause
- echo "clearing fail_loc on $facet"
- do_facet $facet "lctl set_param fail_loc=0 2>/dev/null || true"
+ facet=$1
+ pause=$2
+ sleep $pause
+ echo "clearing fail_loc on $facet"
+ do_facet $facet "lctl set_param fail_loc=0 2>/dev/null || true"
}
set_nodes_failloc () {
do_nodes $(comma_list $1) lctl set_param fail_val=$fv fail_loc=$2
}
+# Print the total of the lock_unused_count across all namespaces containing the
+# given wildcard. If the namespace wildcard is omitted, all namespaces will be
+# matched.
+# Usage: total_unused_locks [namespace_wildcard]
+total_unused_locks() {
+ $LCTL get_param -n "ldlm.namespaces.*$1*.lock_unused_count" | calc_sum
+}
+
+# Print the total of the lock_count across all namespaces containing the given
+# wildcard. If the namespace wilcard is omitted, all namespaces will be matched.
+# Usage: total_used_locks [namespace_wildcard]
+total_used_locks() {
+ $LCTL get_param -n "ldlm.namespaces.*$1*.lock_count" | calc_sum
+}
+
+# Cancel lru locks across all namespaces containing the given wildcard. If the
+# wilcard is omitted, lru locks will be canceled across all namespaces.
+# Usage: cancel_lru_locks [namespace_wildcard]
cancel_lru_locks() {
#$LCTL mark "cancel_lru_locks $1 start"
- $LCTL set_param -n ldlm.namespaces.*$1*.lru_size=clear
- $LCTL get_param ldlm.namespaces.*$1*.lock_unused_count | grep -v '=0'
+ $LCTL set_param -t4 -n "ldlm.namespaces.*$1*.lru_size=clear"
+ $LCTL get_param "ldlm.namespaces.*$1*.lock_unused_count" | grep -v '=0'
#$LCTL mark "cancel_lru_locks $1 stop"
}
default_lru_size()
{
- NR_CPU=$(grep -c "processor" /proc/cpuinfo)
- DEFAULT_LRU_SIZE=$((100 * NR_CPU))
- echo "$DEFAULT_LRU_SIZE"
+ local nr_cpu=$(grep -c "processor" /proc/cpuinfo)
+
+ echo $((100 * nr_cpu))
}
lru_resize_enable()
{
- lctl set_param ldlm.namespaces.*$1*.lru_size=0
+ lctl set_param ldlm.namespaces.*$1*.lru_size=0
}
lru_resize_disable()
{
- lctl set_param ldlm.namespaces.*$1*.lru_size $(default_lru_size)
+ local dev=${1}
+ local lru_size=${2:-$(default_lru_size)}
+
+ $LCTL set_param ldlm.namespaces.*$dev*.lru_size=$lru_size
}
flock_is_enabled()
}
pgcache_empty() {
- local FILE
- for FILE in `lctl get_param -N "llite.*.dump_page_cache"`; do
- if [ `lctl get_param -n $FILE | wc -l` -gt 1 ]; then
- echo there is still data in page cache $FILE ?
- lctl get_param -n $FILE
- return 1
- fi
- done
- return 0
+ local FILE
+
+ for FILE in `lctl get_param -N "llite.*.dump_page_cache"`; do
+ if [ `lctl get_param -n $FILE | wc -l` -gt 1 ]; then
+ echo there is still data in page cache $FILE ?
+ lctl get_param -n $FILE
+ return 1
+ fi
+ done
+ return 0
}
debugsave() {
}
debug_size_save() {
- DEBUG_SIZE_SAVED="$(lctl get_param -n debug_mb)"
+ DEBUG_SIZE_SAVED="$(lctl get_param -n debug_mb)"
}
debug_size_restore() {
- [ -n "$DEBUG_SIZE_SAVED" ] && \
- do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug_mb=$DEBUG_SIZE_SAVED"
- DEBUG_SIZE_SAVED=""
+ [ -n "$DEBUG_SIZE_SAVED" ] &&
+ do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug_mb=$DEBUG_SIZE_SAVED"
+ DEBUG_SIZE_SAVED=""
}
start_full_debug_logging() {
dump=false
fi
- log " ${TESTSUITE} ${TESTNAME}: @@@@@@ ${TYPE}: $@ "
+ log " ${TESTSUITE} ${TESTNAME}: @@@@@@ ${TYPE}: $* "
(print_stack_trace 2) >&2
mkdir -p $LOGDIR
# We need to dump the logs on all nodes
debugrestore
[ "$TESTSUITELOG" ] &&
- echo "$TESTSUITE: $TYPE: $TESTNAME $@" >> $TESTSUITELOG
+ echo "$TESTSUITE: $TYPE: $TESTNAME $*" >> $TESTSUITELOG
if [ -z "$*" ]; then
echo "error() without useful message, please fix" > $LOGDIR/err
else
exit_status () {
local status=0
- local log=$TESTSUITELOG
+ local logs="$TESTSUITELOG $1"
+
+ for log in $logs; do
+ if [ -f "$log" ]; then
+ grep -qw FAIL $log && status=1
+ fi
+ done
- [ -f "$log" ] && grep -qw FAIL $log && status=1
exit $status
}
# environment is not configured properly".
#
skip_env () {
- $FAIL_ON_SKIP_ENV && error false $@ || skip $@
+ $FAIL_ON_SKIP_ENV && error false "$@" || skip "$@"
}
skip_noexit() {
echo
- log " SKIP: $TESTSUITE $TESTNAME $@"
+ log " SKIP: $TESTSUITE $TESTNAME $*"
if [[ -n "$ALWAYS_SKIPPED" ]]; then
skip_logged $TESTNAME "$@"
fi
[[ -n "$TESTSUITELOG" ]] &&
- echo "$TESTSUITE: SKIP: $TESTNAME $@" >> $TESTSUITELOG || true
+ echo "$TESTSUITE: SKIP: $TESTNAME $*" >> $TESTSUITELOG || true
unset TESTNAME
}
skip() {
- skip_noexit $@
+ skip_noexit "$@"
exit 0
}
+#
+# For interop testing treate EOPNOTSUPP as success
+# and skip
+#
+skip_eopnotsupp() {
+ local retstr=$@
+
+ echo $retstr | awk -F'|' '{print $1}' |
+ grep -E unsupported\|"(Operation not supported)"
+ (( $? == 0 )) || error "$retstr"
+ skip $retstr
+}
+
+# Add a list of tests to ALWAYS_EXCEPT due to an issue.
+# Usage: always_except LU-4815 23 42q ...
+#
+function always_except() {
+ local issue="${1:-}" # single jira style issue ("LU-4815")
+ local test_num
+
+ shift
+
+ if ! [[ "$issue" =~ ^[[:upper:]]+-[[:digit:]]+$ ]]; then
+ error "always_except: invalid issue '$issue' for tests '$*'"
+ fi
+
+ for test_num in "$@"; do
+ ALWAYS_EXCEPT+=" $test_num"
+ done
+}
+
build_test_filter() {
EXCEPT="$EXCEPT $(testslist_filter)"
- for O in $ONLY; do
- if [[ $O = [0-9]*-[0-9]* ]]; then
- for num in $(seq $(echo $O | tr '-' ' ')); do
+ # allow test numbers separated by '+', or ',', in addition to ' '
+ # to avoid issues with multiple arguments handling by shell/autotest
+ for O in ${ONLY//[+,]/ }; do
+ if [[ $O =~ [0-9]*-[0-9]* ]]; then
+ for ((num=${O%-[0-9]*}; num <= ${O#[0-9]*-}; num++)); do
eval ONLY_$num=true
done
else
fi
done
- [ "$EXCEPT$ALWAYS_EXCEPT" ] &&
- log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`"
- [ "$EXCEPT_SLOW" ] &&
- log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`"
- for E in $EXCEPT; do
+ [[ -z "$EXCEPT$ALWAYS_EXCEPT" ]] ||
+ log "excepting tests: $(echo $EXCEPT $ALWAYS_EXCEPT)"
+ [[ -z "$EXCEPT_SLOW" ]] ||
+ log "skipping tests SLOW=no: $(echo $EXCEPT_SLOW)"
+ for E in ${EXCEPT//[+,]/ }; do
eval EXCEPT_${E}=true
done
- for E in $ALWAYS_EXCEPT; do
+ for E in ${ALWAYS_EXCEPT//[+,]/ }; do
eval EXCEPT_ALWAYS_${E}=true
done
- for E in $EXCEPT_SLOW; do
+ for E in ${EXCEPT_SLOW//[+,]/ }; do
eval EXCEPT_SLOW_${E}=true
done
- for G in $GRANT_CHECK_LIST; do
+ for G in ${GRANT_CHECK_LIST//[+,]/ }; do
eval GCHECK_ONLY_${G}=true
done
}
basetest() {
- if [[ $1 = [a-z]* ]]; then
- echo $1
- else
- echo ${1%%[a-zA-Z]*}
- fi
+ if [[ $1 = [a-z]* ]]; then
+ echo $1
+ else
+ echo ${1%%[a-zA-Z]*}
+ fi
}
# print a newline if the last test was skipped
local isonly_base=ONLY_$base
if [[ ${!isonly}x != x || ${!isonly_base}x != x ]]; then
- if [[ -n "$ALWAYS_SKIPPED" && -n "$HONOR_EXCEPT" ]]; then
+ if [[ -n "$ALWAYS_SKIPPED" &&
+ -n "$HONOR_EXCEPT" ]]; then
LAST_SKIPPED="y"
skip_noexit "$skip_message"
return 0
echo "$*" >&2
load_module ../libcfs/libcfs/libcfs
- local MSG="$*"
- # Get rid of '
- MSG=${MSG//\'/\\\'}
- MSG=${MSG//\(/\\\(}
- MSG=${MSG//\)/\\\)}
- MSG=${MSG//\;/\\\;}
- MSG=${MSG//\|/\\\|}
- MSG=${MSG//\>/\\\>}
- MSG=${MSG//\</\\\<}
- MSG=${MSG//\//\\\/}
- do_nodes $(comma_list $(nodes_list)) $LCTL mark "$MSG" 2> /dev/null || true
+ local MSG="$*"
+ # Get rid of '
+ MSG=${MSG//\'/\\\'}
+ MSG=${MSG//\*/\\\*}
+ MSG=${MSG//\(/\\\(}
+ MSG=${MSG//\)/\\\)}
+ MSG=${MSG//\;/\\\;}
+ MSG=${MSG//\|/\\\|}
+ MSG=${MSG//\>/\\\>}
+ MSG=${MSG//\</\\\<}
+ MSG=${MSG//\//\\\/}
+ do_nodes $(comma_list $(nodes_list)) $LCTL mark "$MSG" 2> /dev/null || true
}
trace() {
- log "STARTING: $*"
- strace -o $TMP/$1.strace -ttt $*
- RC=$?
- log "FINISHED: $*: rc $RC"
- return 1
+ log "STARTING: $*"
+ strace -o $TMP/$1.strace -ttt $*
+ RC=$?
+ log "FINISHED: $*: rc $RC"
+ return 1
}
-complete () {
- local duration=$1
+complete_test() {
+ local duration=$1
- banner test complete, duration $duration sec
- [ -f "$TESTSUITELOG" ] && egrep .FAIL $TESTSUITELOG || true
- echo duration $duration >>$TESTSUITELOG
+ banner "test complete, duration $duration sec"
+ [ -f "$TESTSUITELOG" ] && egrep .FAIL $TESTSUITELOG || true
+ echo "duration $duration" >>$TESTSUITELOG
}
pass() {
elif [[ -f $LOGDIR/skip ]]; then
TEST_STATUS="SKIP"
fi
- echo "$TEST_STATUS $@" 2>&1 | tee -a $TESTSUITELOG
+ echo "$TEST_STATUS $*" 2>&1 | tee -a $TESTSUITELOG
}
check_mds() {
- local FFREE=$(do_node $SINGLEMDS \
+ local FFREE=$(do_node $SINGLEMDS \
lctl get_param -n osd*.*MDT*.filesfree | calc_sum)
- local FTOTAL=$(do_node $SINGLEMDS \
+ local FTOTAL=$(do_node $SINGLEMDS \
lctl get_param -n osd*.*MDT*.filestotal | calc_sum)
- [ $FFREE -ge $FTOTAL ] && error "files free $FFREE > total $FTOTAL" || true
+ [ $FFREE -ge $FTOTAL ] && error "files free $FFREE > total $FTOTAL" ||
+ true
}
reset_fail_loc () {
- echo -n "Resetting fail_loc on all nodes..."
- do_nodes $(comma_list $(nodes_list)) "lctl set_param -n fail_loc=0 \
- fail_val=0 2>/dev/null" || true
- echo done.
+ #echo -n "Resetting fail_loc on all nodes..."
+ do_nodes --quiet $(comma_list $(nodes_list)) \
+ "lctl set_param -n fail_loc=0 fail_val=0 2>/dev/null" || true
+ #echo done.
}
# Also appends a timestamp and prepends the testsuite name.
#
-EQUALS="===================================================================================================="
+# ======================================================== 15:06:12 (1624050372)
+EQUALS="========================================================"
banner() {
- msg="== ${TESTSUITE} $*"
- last=${msg: -1:1}
- [[ $last != "=" && $last != " " ]] && msg="$msg "
- msg=$(printf '%s%.*s' "$msg" $((${#EQUALS} - ${#msg})) $EQUALS )
- # always include at least == after the message
- log "$msg== $(date +"%H:%M:%S (%s)")"
+ msg="== ${TESTSUITE} $*"
+ last=${msg: -1:1}
+ [[ $last != "=" && $last != " " ]] && msg="$msg "
+ msg=$(printf '%s%.*s' "$msg" $((${#EQUALS} - ${#msg})) $EQUALS )
+ # always include at least == after the message
+ log "$msg== $(date +"%H:%M:%S (%s)")"
}
check_dmesg_for_errors() {
ldiskfs_check_descriptors: Checksum for group 0 failed\|\
group descriptors corrupted"
- res=$(do_nodes $(comma_list $(nodes_list)) "dmesg" | grep "$errors")
+ res=$(do_nodes -q $(comma_list $(nodes_list)) "dmesg" | grep "$errors")
[ -z "$res" ] && return 0
echo "Kernel error detected: $res"
return 1
local testmsg=$2
export tfile=f${testnum}.${TESTSUITE}
export tdir=d${testnum}.${TESTSUITE}
- local name=$TESTSUITE.$TESTNAME.test_log.$(hostname -s).log
- local test_log=$LOGDIR/$name
- local zfs_log_name=$TESTSUITE.$TESTNAME.zfs_log
- local zfs_debug_log=$LOGDIR/$zfs_log_name
+ local test_log=$TESTLOG_PREFIX.$TESTNAME.test_log.$(hostname -s).log
+ local zfs_debug_log=$TESTLOG_PREFIX.$TESTNAME.zfs_log
local SAVE_UMASK=$(umask)
local rc=0
+ local node
+ declare -A kptr_restrict
+ declare -A debug_raw
umask 0022
+ for node in $(all_nodes); do
+ kptr_restrict[$node]=$(do_node $node "sysctl --values kernel/kptr_restrict")
+ do_node $node "sysctl -wq kernel/kptr_restrict=1"
+ # Enable %p to be unhashed (if supported)
+ debug_raw[$node]=$(do_node $node "$LCTL get_param -n debug_raw_pointers" || echo 0)
+ do_node $node "$LCTL set_param debug_raw_pointers=Y || true"
+ done
+
rm -f $LOGDIR/err $LOGDIR/ignore $LOGDIR/skip
echo
- # if ${ONLY_$testnum} set, repeat $ONLY_REPEAT times, otherwise once
- local isonly=ONLY_$testnum
- local repeat=${!isonly:+$ONLY_REPEAT}
+ # if $ONLY is set, repeat subtest $ONLY_REPEAT times, otherwise once
+ local repeat=${ONLY:+$ONLY_REPEAT}
- for testiter in $(seq ${repeat:-1}); do
+ for ((testiter=0; testiter < ${repeat:-1}; testiter++)); do
local before_sub=$SECONDS
- log_sub_test_begin $TESTNAME
+ log_sub_test_begin $TESTNAME
# remove temp files between repetitions to avoid test failures
- [ -n "$append" -a -n "$DIR" -a -n "$tdir" -a -n "$tfile" ] &&
- rm -rf $DIR/$tdir* $DIR/$tfile*
+ if [[ -n "$append" ]]; then
+ [[ -n "$tdir" ]] && rm -rvf $DIR/$tdir*
+ [[ -n "$tfile" ]] && rm -vf $DIR/$tfile*
+ echo "subtest iteration $testiter/$repeat"
+ fi
# loop around subshell so stack_trap EXIT triggers each time
(run_one $testnum "$testmsg") 2>&1 | tee -i $append $test_log
rc=${PIPESTATUS[0]}
fi
pass "$testnum" "(${duration_sub}s)"
+ if [ -n "${DUMP_OK}" ]; then
+ gather_logs $(comma_list $(nodes_list))
+ fi
+
log_sub_test_end $TEST_STATUS $duration_sub "$rc" "$test_error"
- [[ $rc != 0 ]] && break
+ [[ $rc != 0 || "$TEST_STATUS" != "PASS" ]] && break
+ done
+
+ local param
+ for node in $(all_nodes); do
+ param="kernel/kptr_restrict=${kptr_restrict[$node]}"
+ do_node $node "sysctl -wq ${param} || true"
+ # Restore %p to initial state
+ param="debug_raw_pointers=${debug_raw[$node]}"
+ do_node $node "$LCTL set_param ${param} || true"
done
if [[ "$TEST_STATUS" != "SKIP" && -f $TF_SKIP ]]; then
log_sub_test_end "SKIP" "0" "0" "$@"
}
-canonical_path() {
- (cd $(dirname $1); echo $PWD/$(basename $1))
-}
-
grant_from_clients() {
local nodes="$1"
# sync all the data and make sure no pending data on server
do_nodes $clients sync
- clients_up # initiate all idling connections
+ do_nodes $clients $LFS df # initiate all idling connections
# get client grant
cli_grant=$(grant_from_clients $clients)
########################
# helper functions
-osc_to_ost()
-{
- osc=$1
- ost=`echo $1 | awk -F_ '{print $3}'`
- if [ -z $ost ]; then
- ost=`echo $1 | sed 's/-osc.*//'`
- fi
- echo $ost
+osc_to_ost() {
+ local osc=$1
+
+ echo ${osc/-osc*/}
}
-ostuuid_from_index()
-{
- $LFS osts $2 | sed -ne "/^$1: /s/.* \(.*\) .*$/\1/p"
+ostuuid_from_index() {
+ # only print the first UUID, if 'lfs osts' shows multiple mountpoints
+ local uuid=($($LFS osts $2 | sed -ne "/^$1: /s/.* \(.*\) .*$/\1/p"))
+
+ echo ${uuid}
}
ostname_from_index() {
- local uuid=$(ostuuid_from_index $1)
- echo ${uuid/_UUID/}
+ local uuid=$(ostuuid_from_index $1 $2)
+
+ echo ${uuid/_UUID/}
+}
+
+mdtuuid_from_index() {
+ # only print the first UUID, if 'lfs osts' shows multiple mountpoints
+ local uuid=($($LFS mdts $2 | sed -ne "/^$1: /s/.* \(.*\) .*$/\1/p"))
+
+ echo ${uuid}
}
mdtname_from_index() {
- local uuid=$(mdtuuid_from_index $1)
+ local uuid=$(mdtuuid_from_index $1 $2)
+
echo ${uuid/_UUID/}
}
-mdssize_from_index () {
+mdssize_from_index() {
local mdt=$(mdtname_from_index $2)
- $LFS df $1 | grep $mdt | awk '{ print $2 }'
+
+ $LFS df $1 | awk "/$mdt/ { print \$2 }"
}
index_from_ostuuid()
{
- $LFS osts $2 | sed -ne "/${1}/s/\(.*\): .* .*$/\1/p"
-}
+ # only print the first index, if 'lfs osts' shows multiple mountpoints
+ local ostidx=($($LFS osts $2 | sed -ne "/${1}/s/\(.*\): .* .*$/\1/p"))
-mdtuuid_from_index()
-{
- $LFS mdts $2 | sed -ne "/^$1: /s/.* \(.*\) .*$/\1/p"
+ echo ${ostidx}
}
# Description:
# Description:
# Returns list of ip addresses for each interface
local_addr_list() {
- ip addr | awk '/inet\ / {print $2}' | awk -F\/ '{print $1}'
+ ip addr | awk '/inet / {print $2}' | awk -F/ '{print $1}'
+}
+
+# Description:
+# Returns list of interfaces configured for LNet
+lnet_if_list() {
+ local nids=( $($LCTL list_nids | xargs echo) )
+
+ [[ -z ${nids[@]} ]] &&
+ return 0
+
+ if [[ ${NETTYPE} =~ kfi* ]]; then
+ $LNETCTL net show 2>/dev/null | awk '/ cxi[0-9]+$/{print $NF}' |
+ sort -u | xargs echo
+ return 0
+ fi
+
+ declare -a INTERFACES
+
+ for ((i = 0; i < ${#nids[@]}; i++)); do
+ ip=$(sed 's/^\(.*\)@.*$/\1/'<<<${nids[i]})
+ INTERFACES[i]=$(ip -o a s |
+ awk '$4 ~ /^'$ip'\//{print $2}')
+ INTERFACES=($(echo "${INTERFACES[@]}" | tr ' ' '\n' | uniq | tr '\n' ' '))
+ if [[ -z ${INTERFACES[i]} ]]; then
+ error "Can't determine interface name for NID ${nids[i]}"
+ elif [[ 1 -ne $(wc -w <<<${INTERFACES[i]}) ]]; then
+ error "Found $(wc -w <<<${INTERFACES[i]}) interfaces for NID ${nids[i]}. Expect 1"
+ fi
+ done
+
+ echo "${INTERFACES[@]}"
+
+ return 0
}
+# return 1 if addr is remote
+# return 0 if addr is local
is_local_addr() {
local addr=$1
# Cache address list to avoid mutiple execution of local_addr_list
return 1
}
+# return true(0) if host_name is local
+# return false(1) if host_name is remote
local_node() {
local host_name=$1
local is_local="IS_LOCAL_$(host_id $host_name)"
+
if [ -z "${!is_local-}" ] ; then
- eval $is_local=0
- local host_ip=$($LUSTRE/tests/resolveip $host_name)
- is_local_addr "$host_ip" && eval $is_local=1
+ eval $is_local=false
+ local host_ip=$(getent ahostsv4 $host_name |
+ awk 'NR == 1 { print $1 }')
+ is_local_addr "$host_ip" && eval $is_local=true
fi
- [[ "${!is_local}" == "1" ]]
+ ${!is_local}
}
remote_node () {
local node=$1
- local_node $node && return 1
- return 0
+
+ ! local_node $node
}
remote_mds ()
{
- local node
- for node in $(mdts_nodes); do
- remote_node $node && return 0
- done
- return 1
+ local node
+ for node in $(mdts_nodes); do
+ remote_node $node && return 0
+ done
+ return 1
}
remote_mds_nodsh()
remote_ost ()
{
- local node
- for node in $(osts_nodes) ; do
- remote_node $node && return 0
- done
- return 1
+ local node
+ for node in $(osts_nodes) ; do
+ remote_node $node && return 0
+ done
+ return 1
}
remote_ost_nodsh()
require_dsh_ost()
{
- remote_ost_nodsh && echo "SKIP: $TESTSUITE: remote OST with nodsh" && \
- OSKIPPED=1 && return 1
- return 0
+ remote_ost_nodsh && echo "SKIP: $TESTSUITE: remote OST with nodsh" &&
+ OSKIPPED=1 && return 1
+ return 0
}
remote_mgs_nodsh()
local_mode ()
{
- remote_mds_nodsh || remote_ost_nodsh || \
- $(single_local_node $(comma_list $(nodes_list)))
+ remote_mds_nodsh || remote_ost_nodsh ||
+ $(single_local_node $(comma_list $(nodes_list)))
}
remote_servers () {
- remote_ost && remote_mds
+ remote_ost && remote_mds
}
# Get the active nodes for facets.
# Get name of the active MGS node.
mgs_node () {
- echo -n $(facets_nodes $(get_facets MGS))
-}
+ echo -n $(facets_nodes $(get_facets MGS))
+ }
# Get all of the active MDS nodes.
mdts_nodes () {
echo -n $(facets_nodes $(get_facets OST))
}
-# Get all of the active AGT (HSM agent) nodes.
-agts_nodes () {
- echo -n $(facets_nodes $(get_facets AGT))
-}
-
# Get all of the client nodes and active server nodes.
nodes_list () {
local nodes=$HOSTNAME
}
init_clients_lists () {
- # Sanity check: exclude the local client from RCLIENTS
- local clients=$(hostlist_expand "$RCLIENTS")
- local rclients=$(exclude_items_from_list "$clients" $HOSTNAME)
+ # Sanity check: exclude the local client from RCLIENTS
+ local clients=$(hostlist_expand "$RCLIENTS")
+ local rclients=$(exclude_items_from_list "$clients" $HOSTNAME)
- # Sanity check: exclude the dup entries
- RCLIENTS=$(for i in ${rclients//,/ }; do echo $i; done | sort -u)
+ # Sanity check: exclude the dup entries
+ RCLIENTS=$(for i in ${rclients//,/ }; do echo $i; done | sort -u)
- clients="$SINGLECLIENT $HOSTNAME $RCLIENTS"
+ export CLIENT1=${CLIENT1:-$HOSTNAME}
+ export SINGLECLIENT=$CLIENT1
- # Sanity check: exclude the dup entries from CLIENTS
- # for those configs which has SINGLCLIENT set to local client
- clients=$(for i in $clients; do echo $i; done | sort -u)
+ clients="$SINGLECLIENT $HOSTNAME $RCLIENTS"
- CLIENTS=$(comma_list $clients)
- local -a remoteclients=($RCLIENTS)
- for ((i=0; $i<${#remoteclients[@]}; i++)); do
- varname=CLIENT$((i + 2))
- eval $varname=${remoteclients[i]}
- done
+ # Sanity check: exclude the dup entries from CLIENTS
+ # for those configs which has SINGLCLIENT set to local client
+ clients=$(for i in $clients; do echo $i; done | sort -u)
+
+ export CLIENTS=$(comma_list $clients)
+ local -a remoteclients=($RCLIENTS)
+ for ((i=0; $i<${#remoteclients[@]}; i++)); do
+ varname=CLIENT$((i + 2))
+
+ eval export $varname=${remoteclients[i]}
+ done
- CLIENTCOUNT=$((${#remoteclients[@]} + 1))
+ export CLIENTCOUNT=$((${#remoteclients[@]} + 1))
}
get_random_entry () {
- local rnodes=$1
+ local rnodes=$1
- rnodes=${rnodes//,/ }
+ rnodes=${rnodes//,/ }
- local -a nodes=($rnodes)
- local num=${#nodes[@]}
- local i=$((RANDOM * num * 2 / 65536))
+ local -a nodes=($rnodes)
+ local num=${#nodes[@]}
+ local i=$((RANDOM * num * 2 / 65536))
- echo ${nodes[i]}
+ echo ${nodes[i]}
}
client_only () {
}
check_versions () {
- [ "$(lustre_version_code client)" = "$(lustre_version_code $SINGLEMDS)" -a \
- "$(lustre_version_code client)" = "$(lustre_version_code ost1)" ]
+ # this should already have been called, but just in case
+ [[ -n "$CLIENT_VERSION" && -n "$MDS1_VERSION" && -n "$OST1_VERSION" ]]||
+ get_lustre_env
+
+ echo "client=$CLIENT_VERSION MDS=$MDS1_VERSION OSS=$OST1_VERSION"
+
+ [[ -n "$CLIENT_VERSION" && -n "$MDS1_VERSION" && -n "$OST1_VERSION" ]]||
+ error "unable to determine node versions"
+
+ (( "$CLIENT_VERSION" == "$MDS1_VERSION" &&
+ "$CLIENT_VERSION" == "$OST1_VERSION"))
}
get_node_count() {
- local nodes="$@"
- echo $nodes | wc -w || true
-}
+ local nodes="$@"
-mixed_ost_devs () {
- local nodes=$(osts_nodes)
- local osscount=$(get_node_count "$nodes")
- [ ! "$OSTCOUNT" = "$osscount" ]
+ echo $nodes | wc -w || true
}
mixed_mdt_devs () {
- local nodes=$(mdts_nodes)
- local mdtcount=$(get_node_count "$nodes")
- [ ! "$MDSCOUNT" = "$mdtcount" ]
+ local nodes=$(mdts_nodes)
+ local mdtcount=$(get_node_count "$nodes")
+
+ [ ! "$MDSCOUNT" = "$mdtcount" ]
}
generate_machine_file() {
- local nodes=${1//,/ }
- local machinefile=$2
- rm -f $machinefile
- for node in $nodes; do
- echo $node >>$machinefile || \
- { echo "can not generate machinefile $machinefile" && return 1; }
- done
+ local nodes=${1//,/ }
+ local machinefile=$2
+
+ rm -f $machinefile
+ for node in $nodes; do
+ echo $node >>$machinefile ||
+ { echo "can not generate machinefile $machinefile" &&
+ return 1; }
+ done
}
get_stripe () {
rm -f $file
}
-setstripe_nfsserver () {
- local dir=$1
- local nfsexportdir=$2
- shift
- shift
-
- local -a nfsexport=($(awk '"'$dir'" ~ $2 && $3 ~ "nfs" && $2 != "/" \
- { print $1 }' /proc/mounts | cut -f 1 -d :))
-
- # check that only one nfs mounted
- [[ -z $nfsexport ]] && echo "$dir is not nfs mounted" && return 1
- (( ${#nfsexport[@]} == 1 )) ||
- error "several nfs mounts found for $dir: ${nfsexport[@]} !"
-
- do_nodev ${nfsexport[0]} lfs setstripe $nfsexportdir "$@"
-}
-
# Check and add a test group.
add_group() {
local group_id=$1
rc=1
}
else
+ echo "adding group $group_name:$group_id"
+ getent group $group_name || true
+ getent group $group_id || true
groupadd -g $group_id $group_name
rc=${PIPESTATUS[0]}
fi
}
check_runas_id_ret() {
- local myRC=0
- local myRUNAS_UID=$1
- local myRUNAS_GID=$2
- shift 2
- local myRUNAS=$@
- if [ -z "$myRUNAS" ]; then
- error_exit "myRUNAS command must be specified for check_runas_id"
- fi
- if $GSS_KRB5; then
- $myRUNAS krb5_login.sh || \
- error "Failed to refresh Kerberos V5 TGT for UID $myRUNAS_ID."
- fi
- mkdir $DIR/d0_runas_test
- chmod 0755 $DIR
- chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test
- $myRUNAS touch $DIR/d0_runas_test/f$$ || myRC=$?
- rm -rf $DIR/d0_runas_test
- return $myRC
+ local myRC=0
+ local myRUNAS_UID=$1
+ local myRUNAS_GID=$2
+ shift 2
+ local myRUNAS=$@
+
+ if [ -z "$myRUNAS" ]; then
+ error_exit "check_runas_id_ret requires myRUNAS argument"
+ fi
+
+ $myRUNAS true ||
+ error "Unable to execute $myRUNAS"
+
+ id $myRUNAS_UID > /dev/null ||
+ error "Invalid RUNAS_ID $myRUNAS_UID. Please set RUNAS_ID to " \
+ "some UID which exists on MDS and client or add user " \
+ "$myRUNAS_UID:$myRUNAS_GID on these nodes."
+
+ if $GSS_KRB5; then
+ $myRUNAS krb5_login.sh ||
+ error "Failed to refresh krb5 TGT for UID $myRUNAS_ID."
+ fi
+ mkdir $DIR/d0_runas_test
+ chmod 0755 $DIR
+ chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test
+ $myRUNAS touch $DIR/d0_runas_test/f$$ || myRC=$?
+ rm -rf $DIR/d0_runas_test
+ return $myRC
}
check_runas_id() {
- local myRUNAS_UID=$1
- local myRUNAS_GID=$2
- shift 2
- local myRUNAS=$@
- check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \
- error "unable to write to $DIR/d0_runas_test as UID $myRUNAS_UID.
- Please set RUNAS_ID to some UID which exists on MDS and client or
- add user $myRUNAS_UID:$myRUNAS_GID on these nodes."
+ local myRUNAS_UID=$1
+ local myRUNAS_GID=$2
+ shift 2
+ local myRUNAS=$@
+
+ check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \
+ error "unable to write to $DIR/d0_runas_test as " \
+ "UID $myRUNAS_UID."
}
# obtain the UID/GID for MPI_USER
get_mpiuser_id() {
- local mpi_user=$1
+ local mpi_user=$1
- MPI_USER_UID=$(do_facet client "getent passwd $mpi_user | cut -d: -f3;
+ MPI_USER_UID=$(do_facet client "getent passwd $mpi_user | cut -d: -f3;
exit \\\${PIPESTATUS[0]}") || error_exit "failed to get the UID for $mpi_user"
- MPI_USER_GID=$(do_facet client "getent passwd $mpi_user | cut -d: -f4;
+ MPI_USER_GID=$(do_facet client "getent passwd $mpi_user | cut -d: -f4;
exit \\\${PIPESTATUS[0]}") || error_exit "failed to get the GID for $mpi_user"
}
-# obtain and cache Kerberos ticket-granting ticket
-refresh_krb5_tgt() {
- local myRUNAS_UID=$1
- local myRUNAS_GID=$2
- shift 2
- local myRUNAS=$@
- if [ -z "$myRUNAS" ]; then
- error_exit "myRUNAS command must be specified for refresh_krb5_tgt"
- fi
-
- CLIENTS=${CLIENTS:-$HOSTNAME}
- do_nodes $CLIENTS "set -x
-if ! $myRUNAS krb5_login.sh; then
- echo "Failed to refresh Krb5 TGT for UID/GID $myRUNAS_UID/$myRUNAS_GID."
- exit 1
-fi"
-}
-
# Run multiop in the background, but wait for it to print
# "PAUSING" to its stdout before returning from this function.
multiop_bg_pause() {
- MULTIOP_PROG=${MULTIOP_PROG:-$MULTIOP}
- FILE=$1
- ARGS=$2
+ MULTIOP_PROG=${MULTIOP_PROG:-$MULTIOP}
+ FILE=$1
+ ARGS=$2
- TMPPIPE=/tmp/multiop_open_wait_pipe.$$
- mkfifo $TMPPIPE
+ TMPPIPE=/tmp/multiop_open_wait_pipe.$$
+ mkfifo $TMPPIPE
- echo "$MULTIOP_PROG $FILE v$ARGS"
- $MULTIOP_PROG $FILE v$ARGS > $TMPPIPE &
+ echo "$MULTIOP_PROG $FILE v$ARGS"
+ $MULTIOP_PROG $FILE v$ARGS > $TMPPIPE &
+ local pid=$!
- echo "TMPPIPE=${TMPPIPE}"
- read -t 60 multiop_output < $TMPPIPE
- if [ $? -ne 0 ]; then
- rm -f $TMPPIPE
- return 1
- fi
- rm -f $TMPPIPE
- if [ "$multiop_output" != "PAUSING" ]; then
- echo "Incorrect multiop output: $multiop_output"
- kill -9 $PID
- return 1
- fi
+ echo "TMPPIPE=${TMPPIPE}"
+ read -t 60 multiop_output < $TMPPIPE
+ if [ $? -ne 0 ]; then
+ rm -f $TMPPIPE
+ return 1
+ fi
+ rm -f $TMPPIPE
+ if [ "$multiop_output" != "PAUSING" ]; then
+ echo "Incorrect multiop output: $multiop_output"
+ kill -9 $pid
+ return 1
+ fi
- return 0
+ return 0
}
do_and_time () {
inodes_available () {
local IFree=$($LFS df -i $MOUNT | grep ^$FSNAME | awk '{ print $4 }' |
sort -un | head -n1) || return 1
+
echo $((IFree))
}
mdsrate_inodes_available () {
local min_inodes=$(inodes_available)
+
echo $((min_inodes * 99 / 100))
}
+bytes_available () {
+ echo $(df -P -B 1 "$MOUNT" | awk 'END {print $4}')
+}
+
+mdsrate_bytes_available () {
+ local bytes=$(bytes_available)
+
+ echo $((bytes * 99 / 100))
+}
+
# reset stat counters
clear_stats() {
local paramfile="$1"
+
lctl set_param -n $paramfile=0
}
calc_stats() {
local paramfile="$1"
local stat="$2"
+
lctl get_param -n $paramfile |
awk '/^'$stat'/ { sum += $2 } END { printf("%0.0f", sum) }'
}
$LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum
}
+free_min_max () {
+ wait_delete_completed
+ AVAIL=($(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail))
+ echo "OST kbytes available: ${AVAIL[*]}"
+ MAXV=${AVAIL[0]}
+ MAXI=0
+ MINV=${AVAIL[0]}
+ MINI=0
+ for ((i = 0; i < ${#AVAIL[@]}; i++)); do
+ #echo OST $i: ${AVAIL[i]}kb
+ if [[ ${AVAIL[i]} -gt $MAXV ]]; then
+ MAXV=${AVAIL[i]}
+ MAXI=$i
+ fi
+ if [[ ${AVAIL[i]} -lt $MINV ]]; then
+ MINV=${AVAIL[i]}
+ MINI=$i
+ fi
+ done
+ echo "Min free space: OST $MINI: $MINV"
+ echo "Max free space: OST $MAXI: $MAXV"
+}
+
# save_lustre_params(comma separated facet list, parameter_mask)
# generate a stream of formatted strings (<facet> <param name>=<param value>)
save_lustre_params() {
check_node_health() {
local nodes=${1:-$(comma_list $(nodes_list))}
-
- for node in ${nodes//,/ }; do
- check_network "$node" 5
- if [ $? -eq 0 ]; then
- do_node $node "$LCTL get_param catastrophe 2>&1" |
- grep -q "catastrophe=1" &&
- error "$node:LBUG/LASSERT detected" || true
- fi
- done
+ local health=$TMP/node_health.$$
+
+ do_nodes -q $nodes "$LCTL get_param catastrophe 2>&1" | tee $health |
+ grep "catastrophe=1" && error "LBUG/LASSERT detected"
+ # Only check/report network health if get_param isn't reported, since
+ # *clearly* the network is working if get_param returned something.
+ if (( $(grep -c catastro $health) != $(wc -w <<< ${nodes//,/ }) )); then
+ for node in ${nodes//,/ }; do
+ check_network $node 5
+ done
+ fi
+ rm -f $health
}
mdsrate_cleanup () {
fi
}
-delayed_recovery_enabled () {
- local var=${SINGLEMDS}_svc
- do_facet $SINGLEMDS lctl get_param -n mdd.${!var}.stale_export_age > /dev/null 2>&1
+run_mdtest () {
+ local test_type="$1"
+ local file_size=0
+ local num_files=0
+ local num_cores=0
+ local num_procs=0
+ local num_hosts=0
+ local free_space=0
+ local num_inodes=0
+ local num_entries=0
+ local num_dirs=0
+ local np=0
+ local rc=0
+
+ local mdtest_basedir
+ local mdtest_actions
+ local mdtest_options
+ local stripe_options
+ local params_file
+
+ case "$test_type" in
+ create-small)
+ stripe_options=(-c 1 -i 0)
+ mdtest_actions=(-F -R)
+ file_size=1024
+ num_files=100000
+ ;;
+ create-large)
+ stripe_options=(-c -1)
+ mdtest_actions=(-F -R)
+ file_size=$((1024 * 1024 * 1024))
+ num_files=16
+ ;;
+ lookup-single)
+ stripe_options=(-c 1)
+ mdtest_actions=(-C -D -E -k -r)
+ num_dirs=1
+ num_files=100000
+ ;;
+ lookup-multi)
+ stripe_options=(-c 1)
+ mdtest_actions=(-C -D -E -k -r)
+ num_dirs=100
+ num_files=1000
+ ;;
+ *)
+ stripe_options=(-c -1)
+ mdtest_actions=()
+ num_files=100000
+ ;;
+ esac
+
+ if [[ -n "$MDTEST_DEBUG" ]]; then
+ mdtest_options+=(-v -v -v)
+ fi
+
+ num_dirs=${NUM_DIRS:-$num_dirs}
+ num_files=${NUM_FILES:-$num_files}
+ file_size=${FILE_SIZE:-$file_size}
+ free_space=$(mdsrate_bytes_available)
+
+ if (( file_size * num_files > free_space )); then
+ file_size=$((free_space / num_files))
+ log "change file size to $file_size due to" \
+ "number of files $num_files and" \
+ "free space limit in $free_space"
+ fi
+
+ if (( file_size > 0 )); then
+ log "set file size to $file_size"
+ mdtest_options+=(-w=$file_size)
+ fi
+
+ params_file=$TMP/$TESTSUITE-$TESTNAME.parameters
+ mdtest_basedir=$MOUNT/mdtest
+ mdtest_options+=(-d=$mdtest_basedir)
+
+ num_cores=$(nproc)
+ num_hosts=$(get_node_count ${CLIENTS//,/ })
+ num_procs=$((num_cores * num_hosts))
+ num_inodes=$(mdsrate_inodes_available)
+
+ if (( num_inodes < num_files )); then
+ log "change the number of files $num_files to the" \
+ "number of available inodes $num_inodes"
+ num_files=$num_inodes
+ fi
+
+ if (( num_dirs > 1 )); then
+ num_entries=$((num_files / num_dirs))
+ log "split $num_files files to $num_dirs" \
+ "with $num_entries files each"
+ mdtest_options+=(-I=$num_entries)
+ fi
+
+ generate_machine_file $CLIENTS $MACHINEFILE ||
+ error "can not generate machinefile"
+
+ install -v -d -m 0777 $mdtest_basedir
+
+ setstripe_getstripe $mdtest_basedir ${stripe_options[@]}
+
+ save_lustre_params $(get_facets MDS) \
+ mdt.*.enable_remote_dir_gid > $params_file
+
+ do_nodes $(comma_list $(mdts_nodes)) \
+ $LCTL set_param mdt.*.enable_remote_dir_gid=-1
+
+ stack_trap "restore_lustre_params < $params_file" EXIT
+
+ for np in 1 $num_procs; do
+ num_entries=$((num_files / np ))
+
+ mpi_run $MACHINEFILE_OPTION $MACHINEFILE \
+ -np $np -npernode $num_cores $MDTEST \
+ ${mdtest_options[@]} -n=$num_entries \
+ ${mdtest_actions[@]} 2>&1 | tee -a "$LOG"
+
+ rc=${PIPESTATUS[0]}
+
+ if (( rc != 0 )); then
+ mpi_run $MACHINEFILE_OPTION $MACHINEFILE \
+ -np $np -npernode $num_cores $MDTEST \
+ ${mdtest_options[@]} -n=$num_entries \
+ -r 2>&1 | tee -a "$LOG"
+ break
+ fi
+ done
+
+ rmdir -v $mdtest_basedir
+ rm -v $state $MACHINEFILE
+
+ return $rc
}
########################
local varsvc=${facet}_svc
- if [ -n ${!varsvc} ]; then
+ if [ -n "${!varsvc}" ]; then
echo ${!varsvc}
else
error "No label for $facet!"
echo "${1}-osc-[-0-9a-f]*"
}
-# If the 2.0 MDS was mounted on 1.8 device, then the OSC and LOV names
-# used by MDT would not be changed.
-# mdt lov: fsname-mdtlov
-# mdt osc: fsname-OSTXXXX-osc
-mds_on_old_device() {
- local mds=${1:-"$SINGLEMDS"}
-
- if [ $(lustre_version_code $mds) -gt $(version_code 1.9.0) ]; then
- do_facet $mds "lctl list_param osc.$FSNAME-OST*-osc \
- > /dev/null 2>&1" && return 0
- fi
- return 1
-}
-
get_mdtosc_proc_path() {
local mds_facet=$1
local ost_label=${2:-"*OST*"}
}
get_osc_import_name() {
- local facet=$1
- local ost=$2
- local label=$(convert_facet2label $ost)
+ local facet=$1
+ local ost=$2
+ local label=$(convert_facet2label $ost)
- if [ "${facet:0:3}" = "mds" ]; then
- get_mdtosc_proc_path $facet $label
- return 0
- fi
+ if [ "${facet:0:3}" = "mds" ]; then
+ get_mdtosc_proc_path $facet $label
+ return 0
+ fi
- get_clientosc_proc_path $label
- return 0
+ get_clientosc_proc_path $label
+ return 0
}
_wait_import_state () {
- local expected=$1
- local CONN_PROC=$2
- local maxtime=${3:-$(max_recovery_time)}
- local error_on_failure=${4:-1}
- local CONN_STATE
- local i=0
+ local expected="$1"
+ local CONN_PROC="$2"
+ local maxtime=${3:-$(max_recovery_time)}
+ local err_on_fail=${4:-1}
+ local CONN_STATE
+ local i=0
CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq)
- while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do
- if [ "${expected}" == "DISCONN" ]; then
- # for disconn we can check after proc entry is removed
- [ "x${CONN_STATE}" == "x" ] && return 0
- # with AT enabled, we can have connect request timeout near of
- # reconnect timeout and test can't see real disconnect
- [ "${CONN_STATE}" == "CONNECTING" ] && return 0
- fi
- if [ $i -ge $maxtime ]; then
- [ $error_on_failure -ne 0 ] && \
- error "can't put import for $CONN_PROC into ${expected}" \
- "state after $i sec, have ${CONN_STATE}"
- return 1
- fi
- sleep 1
- # Add uniq for multi-mount case
- CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq)
- i=$(($i + 1))
- done
+ while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do
+ if [[ "${expected}" == "DISCONN" ]]; then
+ # for disconn we can check after proc entry is removed
+ [[ -z "${CONN_STATE}" ]] && return 0
+ # with AT, we can have connect request timeout near
+ # reconnect timeout and test can't see real disconnect
+ [[ "${CONN_STATE}" == "CONNECTING" ]] && return 0
+ fi
+ if (( $i >= $maxtime )); then
+ (( $err_on_fail != 0 )) &&
+ error "can't put import for $CONN_PROC into ${expected} state after $i sec, have ${CONN_STATE}"
+ return 1
+ fi
+ sleep 1
+ # Add uniq for multi-mount case
+ CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null |
+ cut -f2 | uniq)
+ i=$((i + 1))
+ done
- log "$CONN_PROC in ${CONN_STATE} state after $i sec"
- return 0
+ log "$CONN_PROC in ${CONN_STATE} state after $i sec"
+ return 0
}
wait_import_state() {
- local state=$1
- local params=$2
- local maxtime=${3:-$(max_recovery_time)}
- local error_on_failure=${4:-1}
- local param
-
- for param in ${params//,/ }; do
- _wait_import_state $state $param $maxtime $error_on_failure || return
- done
+ local expected="$1"
+ local params="$2"
+ local maxtime=${3:-$(max_recovery_time)}
+ local err_on_fail=${4:-1}
+ local param
+
+ for param in ${params//,/ }; do
+ _wait_import_state "$expected" "$param" $maxtime $err_on_fail ||
+ return
+ done
}
wait_import_state_mount() {
return 0
fi
- wait_import_state $*
+ wait_import_state "$@"
}
# One client request could be timed out because server was not ready
# #define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20)
request_timeout () {
- local facet=$1
+ local facet=$1
- # request->rq_timeout = INITIAL_CONNECT_TIMEOUT
- local init_connect_timeout=$TIMEOUT
- [[ $init_connect_timeout -ge 5 ]] || init_connect_timeout=5
+ # request->rq_timeout = INITIAL_CONNECT_TIMEOUT
+ local init_connect_timeout=$TIMEOUT
+ [[ $init_connect_timeout -ge 5 ]] || init_connect_timeout=5
- local at_min=$(at_get $facet at_min)
+ local at_min=$(at_get $facet at_min)
- echo $(( init_connect_timeout + at_min ))
+ echo $(( init_connect_timeout + at_min ))
}
_wait_osc_import_state() {
params=$param
fi
+ local plist=$(comma_list $params)
if ! do_rpc_nodes "$(facet_active_host $facet)" \
- wait_import_state $expected "$params" $maxtime; then
+ wait_import_state $expected $plist $maxtime; then
error "$facet: import is not in $expected state after $maxtime"
return 1
fi
params=$($LCTL list_param $param 2>/dev/null || true)
done
fi
+ local plist=$(comma_list $params)
if ! do_rpc_nodes "$(facet_active_host $facet)" \
- wait_import_state $expected "$params" $maxtime \
+ wait_import_state $expected $plist $maxtime \
$error_on_failure; then
if [ $error_on_failure -ne 0 ]; then
error "import is not in ${expected} state"
$error_on_failure || return
done
else
- _wait_mgc_import_state "$facet" "$expected"
+ _wait_mgc_import_state "$facet" "$expected" \
$error_on_failure || return
fi
}
}
do_rpc_nodes () {
+ local quiet
+
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
local list=$1
shift
local LIBPATH="/usr/lib/lustre/tests:/usr/lib64/lustre/tests:"
local TESTPATH="$RLUSTRE/tests:"
local RPATH="PATH=${TESTPATH}${LIBPATH}${PATH}:/sbin:/bin:/usr/sbin:"
- do_nodesv $list "${RPATH} NAME=${NAME} bash rpc.sh $@ "
+ do_nodes ${quiet:-"--verbose"} $list "${RPATH} NAME=${NAME} \
+ TESTLOG_PREFIX=$TESTLOG_PREFIX TESTNAME=$TESTNAME \
+ bash rpc.sh $* "
}
wait_clients_import_state () {
- local list=$1
- local facet=$2
- local expected=$3
-
- local facets=$facet
+ local list="$1"
+ local facet="$2"
+ local expected="$3"
+ local facets="$facet"
if [ "$FAILURE_MODE" = HARD ]; then
- facets=$(facets_on_host $(facet_active_host $facet))
+ facets=$(for f in ${facet//,/ }; do
+ facets_on_host $(facet_active_host $f) | tr "," "\n"
+ done | sort -u | paste -sd , )
fi
for facet in ${facets//,/ }; do
local proc_path
case $facet in
ost* ) proc_path="osc.$(get_clientosc_proc_path \
- $label).ost_server_uuid" ;;
+ $label).ost_server_uuid" ;;
mds* ) proc_path="mdc.$(get_clientmdc_proc_path \
- $label).mds_server_uuid" ;;
+ $label).mds_server_uuid" ;;
mgs* ) proc_path="mgc.$(get_clientmgc_proc_path \
- $label).mgs_server_uuid" ;;
+ $label).mgs_server_uuid" ;;
*) error "unknown facet!" ;;
esac
fi
}
+wait_clients_import_ready() {
+ wait_clients_import_state "$1" "$2" "\(FULL\|IDLE\)"
+}
+
wait_osp_active() {
local facet=$1
local tgt_name=$2
fi
echo "check $mproc"
- while [ 1 ]; do
+ while true; do
sleep 5
local result=$(do_facet mds${num} "$LCTL get_param -n $mproc")
local max=30
create_pool() {
local fsname=${1%%.*}
local poolname=${1##$fsname.}
+ local keep_pools=${2:-false}
stack_trap "destroy_test_pools $fsname" EXIT
do_facet mgs lctl pool_new $1
wait_update $HOSTNAME "lctl get_param -n lov.$fsname-*.pools.$poolname \
2>/dev/null || echo foo" "" || error "pool_new failed $1"
- add_pool_to_list $1
+ $keep_pools || add_pool_to_list $1
return $RC
}
local poolname=${1##$fsname.}
local listvar=${fsname}_CREATED_POOLS
- local temp=${listvar}=$(exclude_items_from_list ${!listvar} $poolname)
+ local temp=${listvar}=$(exclude_items_from_list "${!listvar}" $poolname)
eval export $temp
}
+# cleanup all pools exist on $FSNAME
+destroy_all_pools () {
+ local i
+ for i in $(list_pool $FSNAME); do
+ destroy_pool $i
+ done
+}
+
destroy_pool_int() {
local ost
local OSTS=$(list_pool $1)
local RC
- check_pool_not_exist $fsname.$poolname
- [[ $? -eq 0 ]] && return 0
+ check_pool_not_exist $fsname.$poolname && return 0 || true
destroy_pool_int $fsname.$poolname
RC=$?
}
destroy_test_pools () {
- trap 0
local fsname=${1:-$FSNAME}
destroy_pools $fsname || true
}
$LCTL dk > ${prefix}.debug_log.$(hostname -s).${suffix}
dmesg > ${prefix}.dmesg.$(hostname -s).${suffix}
[ "$SHARED_KEY" = true ] && find $SK_PATH -name '*.key' -exec \
- lgss_sk -r {} \; &> \
+ $LGSS_SK -r {} \; &> \
${prefix}.ssk_keys.$(hostname -s).${suffix}
[ "$SHARED_KEY" = true ] && lctl get_param 'nodemap.*.*' > \
${prefix}.nodemaps.$(hostname -s).${suffix}
- [ "$GSS_SK" = true ] && keyctl show > \
+ [ "$GSS" = true ] && keyctl show > \
${prefix}.keyring.$(hostname -s).${suffix}
- [ "$GSS_SK" = true ] && journalctl -a > \
+ [ "$GSS" = true ] && journalctl -a > \
${prefix}.journal.$(hostname -s).${suffix}
return
fi
dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}"
if [ "$SHARED_KEY" = true ]; then
do_nodesv $list "find $SK_PATH -name '*.key' -exec \
- lgss_sk -r {} \; &> \
+ $LGSS_SK -r {} \; &> \
${prefix}.ssk_keys.\\\$(hostname -s).${suffix}"
do_facet mds1 "lctl get_param 'nodemap.*.*' > \
${prefix}.nodemaps.\\\$(hostname -s).${suffix}"
fi
- if [ "$GSS_SK" = true ]; then
+ if [ "$GSS" = true ]; then
do_nodesv $list "keyctl show > \
${prefix}.keyring.\\\$(hostname -s).${suffix}"
do_nodesv $list "journalctl -a > \
fi
if [ ! -f $LOGDIR/shared ]; then
- do_nodes $list rsync -az "${prefix}.*.${suffix}" \
- $HOSTNAME:$LOGDIR
+ local remote_nodes=$(exclude_items_from_list $list $HOSTNAME)
+
+ for node in ${remote_nodes//,/ }; do
+ rsync -az -e ssh $node:${prefix}.'*'.${suffix} $LOGDIR &
+ done
fi
}
do_ls () {
- local mntpt_root=$1
- local num_mntpts=$2
- local dir=$3
- local i
- local cmd
- local pids
- local rc=0
-
- for i in $(seq 0 $num_mntpts); do
- cmd="ls -laf ${mntpt_root}$i/$dir"
- echo + $cmd;
- $cmd > /dev/null &
- pids="$pids $!"
- done
- echo pids=$pids
- for pid in $pids; do
- wait $pid || rc=$?
- done
-
- return $rc
+ local mntpt_root=$1
+ local num_mntpts=$2
+ local dir=$3
+ local i
+ local cmd
+ local pids
+ local rc=0
+
+ for i in $(seq 0 $num_mntpts); do
+ cmd="ls -laf ${mntpt_root}$i/$dir"
+ echo + $cmd;
+ $cmd > /dev/null &
+ pids="$pids $!"
+ done
+ echo pids=$pids
+ for pid in $pids; do
+ wait $pid || rc=$?
+ done
+
+ return $rc
}
# check_and_start_recovery_timer()
initial_connect_timeout=$connection_switch_min ||
initial_connect_timeout=$timeout_20
- reconnect_delay_max=$((connection_switch_max + connection_switch_inc + \
+ reconnect_delay_max=$((connection_switch_max + connection_switch_inc +
initial_connect_timeout))
echo $((2 * reconnect_delay_max))
}
combination()
{
- local M=$1
- local N=$2
- local R=1
-
- if [ $M -lt $N ]; then
- R=0
- else
- N=$((N + 1))
- while [ $N -lt $M ]; do
- R=$((R * N))
- N=$((N + 1))
- done
- fi
+ local M=$1
+ local N=$2
+ local R=1
+
+ if [ $M -lt $N ]; then
+ R=0
+ else
+ N=$((N + 1))
+ while [ $N -lt $M ]; do
+ R=$((R * N))
+ N=$((N + 1))
+ done
+ fi
- echo $R
- return 0
+ echo $R
+ return 0
}
calc_connection_cnt() {
set_rule()
{
- local tgt=$1
- local net=$2
- local dir=$3
- local flavor=$4
- local cmd="$tgt.srpc.flavor"
-
- if [ $net == "any" ]; then
- net="default"
- fi
- cmd="$cmd.$net"
+ local tgt=$1
+ local net=$2
+ local dir=$3
+ local flavor=$4
+ local cmd="$tgt.srpc.flavor"
- if [ $dir != "any" ]; then
- cmd="$cmd.$dir"
- fi
+ if [ $net == "any" ]; then
+ net="default"
+ fi
+ cmd="$cmd.$net"
+
+ if [ $dir != "any" ]; then
+ cmd="$cmd.$dir"
+ fi
- cmd="$cmd=$flavor"
- log "Setting sptlrpc rule: $cmd"
- do_facet mgs "$LCTL conf_param $cmd"
+ cmd="$cmd=$flavor"
+ log "Setting sptlrpc rule: $cmd"
+ do_facet mgs "$LCTL conf_param $cmd"
}
count_contexts()
count_flvr()
{
- local output=$1
- local flavor=$2
- local count=0
-
- rpc_flvr=`echo $flavor | awk -F - '{ print $1 }'`
- bulkspec=`echo $flavor | awk -F - '{ print $2 }'`
-
- count=`echo "$output" | grep "rpc flavor" | grep $rpc_flvr | wc -l`
-
- if [ "x$bulkspec" != "x" ]; then
- algs=`echo $bulkspec | awk -F : '{ print $2 }'`
-
- if [ "x$algs" != "x" ]; then
- bulk_count=`echo "$output" | grep "bulk flavor" | grep $algs | wc -l`
- else
- bulk=`echo $bulkspec | awk -F : '{ print $1 }'`
- if [ $bulk == "bulkn" ]; then
- bulk_count=`echo "$output" | grep "bulk flavor" \
- | grep "null/null" | wc -l`
- elif [ $bulk == "bulki" ]; then
- bulk_count=`echo "$output" | grep "bulk flavor" \
- | grep "/null" | grep -v "null/" | wc -l`
- else
- bulk_count=`echo "$output" | grep "bulk flavor" \
- | grep -v "/null" | grep -v "null/" | wc -l`
- fi
- fi
-
- [ $bulk_count -lt $count ] && count=$bulk_count
- fi
+ local output=$1
+ local flavor=$2
+ local count=0
+
+ rpc_flvr=`echo $flavor | awk -F - '{ print $1 }'`
+ bulkspec=`echo $flavor | awk -F - '{ print $2 }'`
+
+ count=`echo "$output" | grep "rpc flavor" | grep $rpc_flvr | wc -l`
+
+ if [ "x$bulkspec" != "x" ]; then
+ algs=`echo $bulkspec | awk -F : '{ print $2 }'`
+
+ if [ "x$algs" != "x" ]; then
+ bulk_count=`echo "$output" | grep "bulk flavor" |
+ grep $algs | wc -l`
+ else
+ bulk=`echo $bulkspec | awk -F : '{ print $1 }'`
+
+ if [ $bulk == "bulkn" ]; then
+ bulk_count=`echo "$output" |
+ grep "bulk flavor" | grep "null/null" |
+ wc -l`
+ elif [ $bulk == "bulki" ]; then
+ bulk_count=`echo "$output" |
+ grep "bulk flavor" | grep "/null" |
+ grep -v "null/" | wc -l`
+ else
+ bulk_count=`echo "$output" |
+ grep "bulk flavor" | grep -v "/null" |
+ grep -v "null/" | wc -l`
+ fi
+ fi
+ [ $bulk_count -lt $count ] && count=$bulk_count
+ fi
- echo $count
+ echo $count
}
flvr_cnt_cli2mdt()
{
- local flavor=$1
- local cnt
+ local flavor=$1
+ local cnt
- local clients=${CLIENTS:-$HOSTNAME}
+ local clients=${CLIENTS:-$HOSTNAME}
- for c in ${clients//,/ }; do
- local output=$(do_node $c lctl get_param -n \
- mdc.*-*-mdc-*.$PROC_CLI 2>/dev/null)
- local tmpcnt=$(count_flvr "$output" $flavor)
- if $GSS_SK && [ $flavor != "null" ]; then
- # tmpcnt=min(contexts,flavors) to ensure SK context is on
- output=$(do_node $c lctl get_param -n \
- mdc.*-MDT*-mdc-*.$PROC_CON 2>/dev/null)
- local outcon=$(count_contexts "$output")
- if [ "$outcon" -lt "$tmpcnt" ]; then
- tmpcnt=$outcon
+ for c in ${clients//,/ }; do
+ local output=$(do_node $c lctl get_param -n \
+ mdc.*-*-mdc-*.$PROC_CLI 2>/dev/null)
+ local tmpcnt=$(count_flvr "$output" $flavor)
+
+ if $GSS_SK && [ $flavor != "null" ]; then
+ # tmpcnt=min(contexts,flavors) to ensure SK context is
+ # on
+ output=$(do_node $c lctl get_param -n \
+ mdc.*-MDT*-mdc-*.$PROC_CON 2>/dev/null)
+ local outcon=$(count_contexts "$output")
+
+ if [ "$outcon" -lt "$tmpcnt" ]; then
+ tmpcnt=$outcon
+ fi
fi
- fi
- cnt=$((cnt + tmpcnt))
- done
- echo $cnt
+ cnt=$((cnt + tmpcnt))
+ done
+ echo $cnt
+}
+
+flvr_dump_cli2mdt()
+{
+ local clients=${CLIENTS:-$HOSTNAME}
+
+ for c in ${clients//,/ }; do
+ do_node $c lctl get_param \
+ mdc.*-*-mdc-*.$PROC_CLI 2>/dev/null
+
+ if $GSS_SK; then
+ do_node $c lctl get_param \
+ mdc.*-MDT*-mdc-*.$PROC_CON 2>/dev/null
+ fi
+ done
}
flvr_cnt_cli2ost()
{
- local flavor=$1
- local cnt
+ local flavor=$1
+ local cnt
- local clients=${CLIENTS:-$HOSTNAME}
+ local clients=${CLIENTS:-$HOSTNAME}
- for c in ${clients//,/ }; do
- # reconnect if idle
- do_node $c lctl set_param osc.*.idle_connect=1 >/dev/null 2>&1
- local output=$(do_node $c lctl get_param -n \
- osc.*OST*-osc-[^M][^D][^T]*.$PROC_CLI 2>/dev/null)
- local tmpcnt=$(count_flvr "$output" $flavor)
- if $GSS_SK && [ $flavor != "null" ]; then
- # tmpcnt=min(contexts,flavors) to ensure SK context is on
- output=$(do_node $c lctl get_param -n \
- osc.*OST*-osc-[^M][^D][^T]*.$PROC_CON 2>/dev/null)
- local outcon=$(count_contexts "$output")
- if [ "$outcon" -lt "$tmpcnt" ]; then
- tmpcnt=$outcon
+ for c in ${clients//,/ }; do
+ # reconnect if idle
+ do_node $c lctl set_param osc.*.idle_connect=1 >/dev/null 2>&1
+ local output=$(do_node $c lctl get_param -n \
+ osc.*OST*-osc-[^M][^D][^T]*.$PROC_CLI 2>/dev/null)
+ local tmpcnt=$(count_flvr "$output" $flavor)
+
+ if $GSS_SK && [ $flavor != "null" ]; then
+ # tmpcnt=min(contexts,flavors) to ensure SK context is on
+ output=$(do_node $c lctl get_param -n \
+ osc.*OST*-osc-[^M][^D][^T]*.$PROC_CON 2>/dev/null)
+ local outcon=$(count_contexts "$output")
+
+ if [ "$outcon" -lt "$tmpcnt" ]; then
+ tmpcnt=$outcon
+ fi
fi
- fi
- cnt=$((cnt + tmpcnt))
- done
- echo $cnt
+ cnt=$((cnt + tmpcnt))
+ done
+ echo $cnt
}
-flvr_cnt_mdt2mdt()
+flvr_dump_cli2ost()
{
- local flavor=$1
- local cnt=0
+ local clients=${CLIENTS:-$HOSTNAME}
- if [ $MDSCOUNT -le 1 ]; then
- echo 0
- return
- fi
+ for c in ${clients//,/ }; do
+ do_node $c lctl get_param \
+ osc.*OST*-osc-[^M][^D][^T]*.$PROC_CLI 2>/dev/null
- for num in `seq $MDSCOUNT`; do
- local output=$(do_facet mds$num lctl get_param -n \
- osp.*-MDT*osp-MDT*.$PROC_CLI 2>/dev/null)
- local tmpcnt=$(count_flvr "$output" $flavor)
- if $GSS_SK && [ $flavor != "null" ]; then
- # tmpcnt=min(contexts,flavors) to ensure SK context is on
- output=$(do_facet mds$num lctl get_param -n \
- osp.*-MDT*osp-MDT*.$PROC_CON 2>/dev/null)
- local outcon=$(count_contexts "$output")
- if [ "$outcon" -lt "$tmpcnt" ]; then
- tmpcnt=$outcon
+ if $GSS_SK; then
+ do_node $c lctl get_param \
+ osc.*OST*-osc-[^M][^D][^T]*.$PROC_CON 2>/dev/null
fi
+ done
+}
+
+flvr_cnt_mdt2mdt()
+{
+ local flavor=$1
+ local cnt=0
+
+ if [ $MDSCOUNT -le 1 ]; then
+ echo 0
+ return
fi
- cnt=$((cnt + tmpcnt))
- done
- echo $cnt;
+
+ for num in `seq $MDSCOUNT`; do
+ local output=$(do_facet mds$num lctl get_param -n \
+ osp.*-MDT*osp-MDT*.$PROC_CLI 2>/dev/null)
+ local tmpcnt=$(count_flvr "$output" $flavor)
+
+ if $GSS_SK && [ $flavor != "null" ]; then
+ # tmpcnt=min(contexts,flavors) to ensure SK context is on
+ output=$(do_facet mds$num lctl get_param -n \
+ osp.*-MDT*osp-MDT*.$PROC_CON 2>/dev/null)
+ local outcon=$(count_contexts "$output")
+
+ if [ "$outcon" -lt "$tmpcnt" ]; then
+ tmpcnt=$outcon
+ fi
+ fi
+ cnt=$((cnt + tmpcnt))
+ done
+ echo $cnt;
+}
+
+flvr_dump_mdt2mdt()
+{
+ for num in `seq $MDSCOUNT`; do
+ do_facet mds$num lctl get_param \
+ osp.*-MDT*osp-MDT*.$PROC_CLI 2>/dev/null
+
+ if $GSS_SK; then
+ do_facet mds$num lctl get_param \
+ osp.*-MDT*osp-MDT*.$PROC_CON 2>/dev/null
+ fi
+ done
}
flvr_cnt_mdt2ost()
{
- local flavor=$1
- local cnt=0
- local mdtosc
-
- for num in `seq $MDSCOUNT`; do
- mdtosc=$(get_mdtosc_proc_path mds$num)
- mdtosc=${mdtosc/-MDT*/-MDT\*}
- local output=$(do_facet mds$num lctl get_param -n \
- os[cp].$mdtosc.$PROC_CLI 2>/dev/null)
- local tmpcnt=$(count_flvr "$output" $flavor)
- if $GSS_SK && [ $flavor != "null" ]; then
- # tmpcnt=min(contexts,flavors) to ensure SK context is on
- output=$(do_facet mds$num lctl get_param -n \
- os[cp].$mdtosc.$PROC_CON 2>/dev/null)
- local outcon=$(count_contexts "$output")
- if [ "$outcon" -lt "$tmpcnt" ]; then
- tmpcnt=$outcon
+ local flavor=$1
+ local cnt=0
+ local mdtosc
+
+ for num in `seq $MDSCOUNT`; do
+ mdtosc=$(get_mdtosc_proc_path mds$num)
+ mdtosc=${mdtosc/-MDT*/-MDT\*}
+ local output=$(do_facet mds$num lctl get_param -n \
+ os[cp].$mdtosc.$PROC_CLI 2>/dev/null)
+ # Ensure SK context is on
+ local tmpcnt=$(count_flvr "$output" $flavor)
+
+ if $GSS_SK && [ $flavor != "null" ]; then
+ output=$(do_facet mds$num lctl get_param -n \
+ os[cp].$mdtosc.$PROC_CON 2>/dev/null)
+ local outcon=$(count_contexts "$output")
+
+ if [ "$outcon" -lt "$tmpcnt" ]; then
+ tmpcnt=$outcon
+ fi
fi
- fi
- cnt=$((cnt + tmpcnt))
- done
- echo $cnt;
+ cnt=$((cnt + tmpcnt))
+ done
+ echo $cnt;
+}
+
+flvr_dump_mdt2ost()
+{
+ for num in `seq $MDSCOUNT`; do
+ mdtosc=$(get_mdtosc_proc_path mds$num)
+ mdtosc=${mdtosc/-MDT*/-MDT\*}
+ do_facet mds$num lctl get_param \
+ os[cp].$mdtosc.$PROC_CLI 2>/dev/null
+
+ if $GSS_SK; then
+ do_facet mds$num lctl get_param \
+ os[cp].$mdtosc.$PROC_CON 2>/dev/null
+ fi
+ done
}
flvr_cnt_mgc2mgs()
{
- local flavor=$1
+ local flavor=$1
- local output=$(do_facet client lctl get_param -n mgc.*.$PROC_CLI \
+ local output=$(do_facet client lctl get_param -n mgc.*.$PROC_CLI \
2>/dev/null)
- count_flvr "$output" $flavor
+ count_flvr "$output" $flavor
}
do_check_flavor()
{
- local dir=$1 # from to
- local flavor=$2 # flavor expected
- local res=0
-
- if [ $dir == "cli2mdt" ]; then
- res=`flvr_cnt_cli2mdt $flavor`
- elif [ $dir == "cli2ost" ]; then
- res=`flvr_cnt_cli2ost $flavor`
- elif [ $dir == "mdt2mdt" ]; then
- res=`flvr_cnt_mdt2mdt $flavor`
- elif [ $dir == "mdt2ost" ]; then
- res=`flvr_cnt_mdt2ost $flavor`
- elif [ $dir == "all2ost" ]; then
- res1=`flvr_cnt_mdt2ost $flavor`
- res2=`flvr_cnt_cli2ost $flavor`
- res=$((res1 + res2))
- elif [ $dir == "all2mdt" ]; then
- res1=`flvr_cnt_mdt2mdt $flavor`
- res2=`flvr_cnt_cli2mdt $flavor`
- res=$((res1 + res2))
- elif [ $dir == "all2all" ]; then
- res1=`flvr_cnt_mdt2ost $flavor`
- res2=`flvr_cnt_cli2ost $flavor`
- res3=`flvr_cnt_mdt2mdt $flavor`
- res4=`flvr_cnt_cli2mdt $flavor`
- res=$((res1 + res2 + res3 + res4))
- fi
+ local dir=$1 # from to
+ local flavor=$2 # flavor expected
+ local res=0
+
+ if [ $dir == "cli2mdt" ]; then
+ res=`flvr_cnt_cli2mdt $flavor`
+ elif [ $dir == "cli2ost" ]; then
+ res=`flvr_cnt_cli2ost $flavor`
+ elif [ $dir == "mdt2mdt" ]; then
+ res=`flvr_cnt_mdt2mdt $flavor`
+ elif [ $dir == "mdt2ost" ]; then
+ res=`flvr_cnt_mdt2ost $flavor`
+ elif [ $dir == "all2ost" ]; then
+ res1=`flvr_cnt_mdt2ost $flavor`
+ res2=`flvr_cnt_cli2ost $flavor`
+ res=$((res1 + res2))
+ elif [ $dir == "all2mdt" ]; then
+ res1=`flvr_cnt_mdt2mdt $flavor`
+ res2=`flvr_cnt_cli2mdt $flavor`
+ res=$((res1 + res2))
+ elif [ $dir == "all2all" ]; then
+ res1=`flvr_cnt_mdt2ost $flavor`
+ res2=`flvr_cnt_cli2ost $flavor`
+ res3=`flvr_cnt_mdt2mdt $flavor`
+ res4=`flvr_cnt_cli2mdt $flavor`
+ res=$((res1 + res2 + res3 + res4))
+ fi
- echo $res
+ echo $res
+}
+
+do_dump_imp_state()
+{
+ local clients=${CLIENTS:-$HOSTNAME}
+ local type=$1
+
+ for c in ${clients//,/ }; do
+ [ "$type" == "osc" ] &&
+ do_node $c lctl get_param osc.*.idle_timeout
+ do_node $c lctl get_param $type.*.import |
+ grep -E "name:|state:"
+ done
+}
+
+do_dump_flavor()
+{
+ local dir=$1 # from to
+
+ if [ $dir == "cli2mdt" ]; then
+ do_dump_imp_state mdc
+ flvr_dump_cli2mdt
+ elif [ $dir == "cli2ost" ]; then
+ do_dump_imp_state osc
+ flvr_dump_cli2ost
+ elif [ $dir == "mdt2mdt" ]; then
+ flvr_dump_mdt2mdt
+ elif [ $dir == "mdt2ost" ]; then
+ flvr_dump_mdt2ost
+ elif [ $dir == "all2ost" ]; then
+ flvr_dump_mdt2ost
+ do_dump_imp_state osc
+ flvr_dump_cli2ost
+ elif [ $dir == "all2mdt" ]; then
+ flvr_dump_mdt2mdt
+ do_dump_imp_state mdc
+ flvr_dump_cli2mdt
+ elif [ $dir == "all2all" ]; then
+ flvr_dump_mdt2ost
+ do_dump_imp_state osc
+ flvr_dump_cli2ost
+ flvr_dump_mdt2mdt
+ do_dump_imp_state mdc
+ flvr_dump_cli2mdt
+ fi
}
wait_flavor()
done
echo "Error checking $flavor of $dir: expect $expect, actual $res"
-# echo "Dumping additional logs for SK debug.."
do_nodes $(comma_list $(all_server_nodes)) "keyctl show"
+ do_dump_flavor $dir
if $dump; then
gather_logs $(comma_list $(nodes_list))
fi
fi
echo "GSS_SK now at flavor: $flavor"
else
- set_rule $FSNAME any any $flavor
- wait_flavor all2all $flavor
+ set_rule $FSNAME any cli2mdt $flavor
+ set_rule $FSNAME any cli2ost $flavor
+ set_rule $FSNAME any mdt2ost null
+ set_rule $FSNAME any mdt2mdt null
+ wait_flavor cli2mdt $flavor
+ wait_flavor cli2ost $flavor
fi
}
check_logdir() {
- local dir=$1
- # Checking for shared logdir
- if [ ! -d $dir ]; then
- # Not found. Create local logdir
- mkdir -p $dir
- else
- touch $dir/check_file.$(hostname -s)
- fi
- return 0
+ local dir=$1
+ # Checking for shared logdir
+ if [ ! -d $dir ]; then
+ # Not found. Create local logdir
+ mkdir -p $dir
+ else
+ touch $dir/check_file.$(hostname -s)
+ fi
+ return 0
}
check_write_access() {
# log actual client and server versions if needed for debugging
log "Client: $(lustre_build_version client)"
+ lustre_os_release echo client
log "MDS: $(lustre_build_version mds1)"
+ lustre_os_release echo mds1
log "OSS: $(lustre_build_version ost1)"
+ lustre_os_release echo ost1
}
log_test() {
- yml_log_test $1 >> $YAML_LOG
+ yml_log_test $1 >> $YAML_LOG
}
log_test_status() {
- yml_log_test_status $@ >> $YAML_LOG
+ yml_log_test_status "$@" >> $YAML_LOG
}
log_sub_test_begin() {
- yml_log_sub_test_begin "$@" >> $YAML_LOG
+ yml_log_sub_test_begin "$@" >> $YAML_LOG
}
log_sub_test_end() {
- yml_log_sub_test_end "$@" >> $YAML_LOG
+ yml_log_sub_test_end "$@" >> $YAML_LOG
}
run_llverdev()
{
- local dev=$1
- local llverdev_opts=$2
- local devname=$(basename $1)
- local size=$(grep "$devname"$ /proc/partitions | awk '{print $3}')
- # loop devices aren't in /proc/partitions
- [ "x$size" == "x" ] && local size=$(ls -l $dev | awk '{print $5}')
+ local dev=$1; shift
+ local llverdev_opts="$*"
+ local devname=$(basename $dev)
+ local size=$(awk "/$devname$/ {print \$3}" /proc/partitions)
+ # loop devices aren't in /proc/partitions
+ [[ -z "$size" ]] && size=$(stat -c %s $dev)
- size=$(($size / 1024 / 1024)) # Gb
+ local size_gb=$((size / 1024 / 1024)) # Gb
- local partial_arg=""
- # Run in partial (fast) mode if the size
- # of a partition > 1 GB
- [ $size -gt 1 ] && partial_arg="-p"
+ local partial_arg=""
+ # Run in partial (fast) mode if the size of a partition > 1 GB
+ (( $size == 0 || $size_gb > 1 )) && partial_arg="-p"
- llverdev --force $partial_arg $llverdev_opts $dev
+ llverdev --force $partial_arg $llverdev_opts $dev
}
run_llverfs()
{
- local dir=$1
- local llverfs_opts=$2
- local use_partial_arg=$3
- local partial_arg=""
- local size=$(df -B G $dir |tail -n 1 |awk '{print $2}' |sed 's/G//') #GB
-
- # Run in partial (fast) mode if the size
- # of a partition > 1 GB
- [ "x$use_partial_arg" != "xno" ] && [ $size -gt 1 ] && partial_arg="-p"
-
- llverfs $partial_arg $llverfs_opts $dir
-}
-
-#Remove objects from OST
-remove_ost_objects() {
- local facet=$1
- local ostdev=$2
- local group=$3
- shift 3
- local objids="$@"
- local mntpt=$(facet_mntpt $facet)
- local opts=$OST_MOUNT_OPTS
- local i
- local rc
-
- echo "removing objects from $ostdev on $facet: $objids"
- if ! test -b $ostdev; then
- opts=$(csa_add "$opts" -o loop)
- fi
- mount -t $(facet_fstype $facet) $opts $ostdev $mntpt ||
- return $?
- rc=0
- for i in $objids; do
- rm $mntpt/O/$group/d$((i % 32))/$i || { rc=$?; break; }
- done
- umount -f $mntpt || return $?
- return $rc
-}
-
-#Remove files from MDT
-remove_mdt_files() {
- local facet=$1
- local mdtdev=$2
- shift 2
- local files="$@"
- local mntpt=$(facet_mntpt $facet)
- local opts=$MDS_MOUNT_OPTS
-
- echo "removing files from $mdtdev on $facet: $files"
- if [ $(facet_fstype $facet) == ldiskfs ] &&
- ! do_facet $facet test -b $mdtdev; then
- opts=$(csa_add "$opts" -o loop)
- fi
- mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt ||
- return $?
- rc=0
- for f in $files; do
- rm $mntpt/ROOT/$f || { rc=$?; break; }
- done
- umount -f $mntpt || return $?
- return $rc
-}
-
-duplicate_mdt_files() {
- local facet=$1
- local mdtdev=$2
- shift 2
- local files="$@"
- local mntpt=$(facet_mntpt $facet)
- local opts=$MDS_MOUNT_OPTS
+ local dir=$1
+ local llverfs_opts=$2
+ local use_partial_arg=$3
+ local partial_arg=""
+ local size=$(df -B G $dir |tail -n 1 |awk '{print $2}' |sed 's/G//') #GB
- echo "duplicating files on $mdtdev on $facet: $files"
- mkdir -p $mntpt || return $?
- if [ $(facet_fstype $facet) == ldiskfs ] &&
- ! do_facet $facet test -b $mdtdev; then
- opts=$(csa_add "$opts" -o loop)
- fi
- mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt ||
- return $?
+ # Run in partial (fast) mode if the size of a partition > 1 GB
+ [ "x$use_partial_arg" != "xno" ] && [ $size -gt 1 ] && partial_arg="-p"
- do_umount() {
- trap 0
- popd > /dev/null
- rm $tmp
- umount -f $mntpt
- }
- trap do_umount EXIT
-
- tmp=$(mktemp $TMP/setfattr.XXXXXXXXXX)
- pushd $mntpt/ROOT > /dev/null || return $?
- rc=0
- for f in $files; do
- touch $f.bad || return $?
- getfattr -n trusted.lov $f | sed "s#$f#&.bad#" > $tmp
- rc=${PIPESTATUS[0]}
- [ $rc -eq 0 ] || return $rc
- setfattr --restore $tmp || return $?
- done
- do_umount
+ llverfs $partial_arg $llverfs_opts $dir
}
run_sgpdd () {
- local devs=${1//,/ }
- shift
- local params=$@
- local rslt=$TMP/sgpdd_survey
+ local devs=${1//,/ }
+ shift
+ local params=$@
+ local rslt=$TMP/sgpdd_survey
- # sgpdd-survey cleanups ${rslt}.* files
+ # sgpdd-survey cleanups ${rslt}.* files
- local cmd="rslt=$rslt $params scsidevs=\"$devs\" $SGPDDSURVEY"
- echo + $cmd
- eval $cmd
- cat ${rslt}.detail
+ local cmd="rslt=$rslt $params scsidevs=\"$devs\" $SGPDDSURVEY"
+ echo + $cmd
+ eval $cmd
+ cat ${rslt}.detail
}
# returns the canonical name for an ldiskfs device
}
is_sanity_benchmark() {
- local benchmarks="dbench bonnie iozone fsx"
- local suite=$1
- for b in $benchmarks; do
- if [ "$b" == "$suite" ]; then
- return 0
- fi
- done
- return 1
+ local benchmarks="dbench bonnie iozone fsx"
+ local suite=$1
+
+ for b in $benchmarks; do
+ if [ "$b" == "$suite" ]; then
+ return 0
+ fi
+ done
+ return 1
}
min_ost_size () {
local device=$2
local count
- [ -z "$CLIENTONLY" ] && count=$(do_facet $facet "$DUMPE2FS -h $device 2>&1" |
- awk '/^Block count:/ {print $3}')
+ [ -z "$CLIENTONLY" ] &&
+ count=$(do_facet $facet "$DUMPE2FS -h $device 2>&1" |
+ awk '/^Block count:/ {print $3}')
echo -n ${count:-0}
}
-# Get the block size of the filesystem.
-get_block_size() {
- local facet=$1
- local device=$2
- local size
-
- [ -z "$CLIENTONLY" ] && size=$(do_facet $facet "$DUMPE2FS -h $device 2>&1" |
- awk '/^Block size:/ {print $3}')
- echo -n ${size:-0}
-}
-
# Check whether the "ea_inode" feature is enabled or not, to allow
# ldiskfs xattrs over one block in size. Allow both the historical
# Lustre feature name (large_xattr) and the upstream name (ea_inode).
large_xattr_enabled() {
- [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 1
+ [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 0
local mds_dev=$(mdsdevname ${SINGLEMDS//mds/})
# Dump the value of the named xattr from a file.
get_xattr_value() {
- local xattr_name=$1
- local file=$2
+ local xattr_name=$1
+ local file=$2
- echo "$(getfattr -n $xattr_name --absolute-names --only-values $file)"
+ echo "$(getfattr -n $xattr_name --absolute-names --only-values $file)"
}
# Generate a string with size of $size bytes.
generate_string() {
- local size=${1:-1024} # in bytes
+ local size=${1:-1024} # in bytes
- echo "$(head -c $size < /dev/zero | tr '\0' y)"
+ echo "$(head -c $size < /dev/zero | tr '\0' y)"
}
reformat_external_journal() {
local var
var=${facet}_JRN
+ local varbs=${facet}_BLOCKSIZE
if [ -n "${!var}" ]; then
local rcmd="do_facet $facet"
+ local bs=${!varbs:-$BLCKSIZE}
+ bs="-b $bs"
echo "reformat external journal on $facet:${!var}"
- ${rcmd} mke2fs -O journal_dev ${!var} || return 1
+ ${rcmd} mke2fs -O journal_dev $bs ${!var} || return 1
fi
}
local devname=$(mdsdevname $(facet_number $facet))
local mntpt=$(facet_mntpt brpt)
local rcmd="do_facet $facet"
- local metaea=${TMP}/backup_restore.ea
local metadata=${TMP}/backup_restore.tgz
local opts=${MDS_MOUNT_FS_OPTS}
local svc=${facet}_svc
# step 1: build mount point
${rcmd} mkdir -p $mntpt
# step 2: cleanup old backup
- ${rcmd} rm -f $metaea $metadata
+ ${rcmd} rm -f $metadata
# step 3: mount dev
- ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 1
+ ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 3
if [ ! -z $igif ]; then
# step 3.5: rm .lustre
- ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 1
+ ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 3
fi
- # step 4: backup metaea
- echo "backup EA"
- ${rcmd} "cd $mntpt && getfattr -R -d -m '.*' -P . > $metaea && cd -" ||
- return 2
- # step 5: backup metadata
+ # step 4: backup metadata
echo "backup data"
- ${rcmd} tar zcf $metadata -C $mntpt/ . > /dev/null 2>&1 || return 3
- # step 6: umount
- ${rcmd} $UMOUNT $mntpt || return 4
- # step 8: reformat dev
+ ${rcmd} tar zcf $metadata --xattrs --xattrs-include="trusted.*" \
+ --sparse -C $mntpt/ . > /dev/null 2>&1 || return 4
+ # step 5: umount
+ ${rcmd} $UMOUNT $mntpt || return 5
+ # step 6: reformat dev
echo "reformat new device"
format_mdt $(facet_number $facet)
- # step 9: mount dev
+ # step 7: mount dev
${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 7
- # step 10: restore metadata
+ # step 8: restore metadata
echo "restore data"
- ${rcmd} tar zxfp $metadata -C $mntpt > /dev/null 2>&1 || return 8
- # step 11: restore metaea
- echo "restore EA"
- ${rcmd} "cd $mntpt && setfattr --restore=$metaea && cd - " || return 9
- # step 12: remove recovery logs
+ ${rcmd} tar zxfp $metadata --xattrs --xattrs-include="trusted.*" \
+ --sparse -C $mntpt > /dev/null 2>&1 || return 8
+ # step 9: remove recovery logs
echo "remove recovery logs"
${rcmd} rm -fv $mntpt/OBJECTS/* $mntpt/CATALOGS
- # step 13: umount dev
+ # step 10: umount dev
${rcmd} $UMOUNT $mntpt || return 10
- # step 14: cleanup tmp backup
+ # step 11: cleanup tmp backup
${rcmd} rm -f $metaea $metadata
- # step 15: reset device label - it's not virgin on
+ # step 12: reset device label - it's not virgin on
${rcmd} e2label $devname ${!svc}
}
local dirstripe_count=${DIRSTRIPE_COUNT:-"2"}
local dirstripe_index=${DIRSTRIPE_INDEX:-$((base % $MDSCOUNT))}
local OPTIND=1
+ local overstripe_count
+ local stripe_command="-c"
+
+ (( $MDS1_VERSION > $(version_code 2.15.0) )) &&
+ hash_name+=("crush2")
- while getopts "c:H:i:p" opt; do
+ while getopts "c:C:H:i:p" opt; do
case $opt in
c) dirstripe_count=$OPTARG;;
+ C) overstripe_count=$OPTARG;;
H) hash_type=$OPTARG;;
i) dirstripe_index=$OPTARG;;
p) p_option="-p";;
fi
fi
+ if [[ -n "$overstripe_count" ]]; then
+ stripe_command="-C"
+ dirstripe_count=$overstripe_count
+ fi
+
if [ $MDSCOUNT -le 1 ] || ! is_lustre ${parent}; then
mkdir $path || error "mkdir '$path' failed"
else
dirstripe_count=1
fi
- echo "striped dir -i$mdt_index -c$dirstripe_count -H $hash_type $path"
- $LFS mkdir -i$mdt_index -c$dirstripe_count -H $hash_type $path ||
- error "mkdir -i $mdt_index -c$dirstripe_count -H $hash_type $path failed"
+ echo "striped dir -i$mdt_index $stripe_command$dirstripe_count -H $hash_type $path"
+ $LFS mkdir -i$mdt_index $stripe_command$dirstripe_count -H $hash_type $path ||
+ error "mkdir -i $mdt_index $stripe_command$dirstripe_count -H $hash_type $path failed"
fi
}
is_mounted $MOUNT || setupall
rm -rf $DIR/[df][0-9]* || error "Fail to cleanup the env!"
- mkdir $DIR/$tdir || error "Fail to mkdir $DIR/$tdir."
+ mkdir_on_mdt0 $DIR/$tdir || error "Fail to mkdir $DIR/$tdir."
for idx in $(seq $MDSCOUNT); do
local name="MDT$(printf '%04x' $((idx - 1)))"
rm -rf $MOUNT/.lustre/lost+found/$name/*
if [ -z $last ]; then
local list=$first
+ last=$first
else
local list=$(seq $first $step $last)
fi
local t=$(for i in $list; do printf "$FSNAME-OST%04x_UUID " $i; done)
+ local tg=$(for i in $list;
+ do printf -- "-e $FSNAME-OST%04x_UUID " $i; done)
+ local firstx=$(printf "%04x" $first)
+ local lastx=$(printf "%04x" $last)
+
do_facet mgs $LCTL pool_add \
- $FSNAME.$pool $FSNAME-OST[$first-$last/$step]
+ $FSNAME.$pool $FSNAME-OST[$firstx-$lastx/$step]
+ # ignore EEXIST(17)
+ if (( $? != 0 && $? != 17 )); then
+ error_noexit "pool_add $FSNAME-OST[$firstx-$lastx/$step] failed"
+ return 3
+ fi
# wait for OSTs to be added to the pool
for mds_id in $(seq $MDSCOUNT); do
local lodname=$FSNAME-MDT$(printf "%04x" $mdt_id)-mdtlov
wait_update_facet mds$mds_id \
"lctl get_param -n lod.$lodname.pools.$pool |
- sort -u | tr '\n' ' ' " "$t" || {
+ grep $tg | sort -u | tr '\n' ' '" "$t" || {
error_noexit "mds$mds_id: Add to pool failed"
- return 3
+ return 2
}
done
- wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool \
- | sort -u | tr '\n' ' ' " "$t" || {
+ wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool |
+ grep $tg | sort -u | tr '\n' ' ' " "$t" || {
error_noexit "Add to pool failed"
return 1
}
- local lfscount=$($LFS pool_list $FSNAME.$pool | grep -c "\-OST")
- local addcount=$(((last - first) / step + 1))
- [ $lfscount -eq $addcount ] || {
- error_noexit "lfs pool_list bad ost count" \
- "$lfscount != $addcount"
- return 2
- }
}
pool_set_dir() {
echo "-c $val"
elif [[ $line =~ ^"lmm_stripe_size:" ]]; then
echo "-S $val"
- elif [[ $line =~ ^"lmm_stripe_offset:" ]]; then
+ elif [[ $line =~ ^"lmm_stripe_offset:" && $SKIP_INDEX != yes ]]; then
echo "-i $val"
elif [[ $line =~ ^"lmm_pattern:" ]]; then
echo "-L $val"
local rc=0
for osc in $oscs; do
- ((rc++))
echo "Check state for $osc"
local evicted=$(do_facet client $LCTL get_param osc.$osc.state |
- tail -n 3 | awk -F"[ [,]" \
- '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
+ tail -n 5 | awk -F"[ ,]" \
+ '/EVICTED/ { if (mx<$4) { mx=$4; } } END { print mx }')
if (($? == 0)) && (($evicted > $before)); then
echo "$osc is evicted at $evicted"
- ((rc--))
+ else
+ ((rc++))
+ echo "$osc was not evicted after $before:"
+ do_facet client $LCTL get_param osc.$osc.state |
+ tail -n 8
fi
done
error "$mdt: changelog_mask=+hsm failed: $?"
local cl_user
- cl_user=$(do_facet $facet \
- $LCTL --device $mdt changelog_register -n) ||
+ cl_user=$(do_facet $facet $LCTL --device $mdt \
+ changelog_register -n "$@") ||
error "$mdt: register changelog user failed: $?"
stack_trap "__changelog_deregister $facet $cl_user" EXIT
# cl_user is constrained enough to use whitespaces as separators
CL_USERS[$facet]+="$cl_user "
done
- echo "Registered $MDSCOUNT changelog users: '${CL_USERS[@]% }'"
+ echo "Registered $MDSCOUNT changelog users: '${CL_USERS[*]% }'"
}
changelog_deregister() {
$LFS changelog_clear $mdt $cl_user $rec
}
-# usage: changelog_clear [+]INDEX
+# usage: changelog_clear [+]INDEX [facet]...
#
# If INDEX is prefixed with '+', increment every changelog user's record index
# by INDEX. Otherwise, clear the changelog up to INDEX for every changelog
# users.
changelog_clear() {
local rc
+ local idx=$1
+ shift
+ local cl_facets="$@"
# bash assoc arrays do not guarantee to list keys in created order
# so reorder to get same order than in changelog_register()
- local cl_facets=$(echo "${!CL_USERS[@]}" | tr " " "\n" | sort |
- tr "\n" " ")
+ [[ -n "$cl_facets" ]] ||
+ cl_facets=$(echo "${!CL_USERS[@]}" | tr " " "\n" | sort |
+ tr "\n" " ")
+ local cl_user
for facet in $cl_facets; do
for cl_user in ${CL_USERS[$facet]}; do
- __changelog_clear $facet $cl_user $1 || rc=${rc:-$?}
+ __changelog_clear $facet $cl_user $idx || rc=${rc:-$?}
done
done
is_project_quota_supported() {
$ENABLE_PROJECT_QUOTAS || return 1
+ [[ -z "$SAVE_PROJECT_SUPPORTED" ]] || return $SAVE_PROJECT_SUPPORTED
+ local save_project_supported=1
[[ "$(facet_fstype $SINGLEMDS)" == "ldiskfs" &&
$(lustre_version_code $SINGLEMDS) -gt $(version_code 2.9.55) ]] &&
- do_facet mds1 lfs --help |& grep -q project && return 0
+ do_facet mds1 lfs --list-commands |& grep -q project &&
+ save_project_supported=0
[[ "$(facet_fstype $SINGLEMDS)" == "zfs" &&
$(lustre_version_code $SINGLEMDS) -gt $(version_code 2.10.53) ]] &&
- do_facet mds1 $ZPOOL get all | grep -q project_quota && return 0
+ do_facet mds1 $ZPOOL get all | grep -q project_quota &&
+ save_project_supported=0
- return 1
+ # cache state of project quotas once instead of re-checking each time
+ export SAVE_PROJECT_SUPPORTED=$save_project_supported
+ echo "using SAVE_PROJECT_SUPPORTED=$SAVE_PROJECT_SUPPORTED"
+
+ return $save_project_supported
}
# ZFS project quota enable/disable:
export SINGLEAGT=${SINGLEAGT:-agt1}
export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
+ export HSMTOOL_PID_FILE=${HSMTOOL_PID_FILE:-"/var/run/lhsmtool_posix.pid"}
export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""}
export HSMTOOL_TESTDIR
- export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
+ export HSMTOOL_ARCHIVE_FORMAT=${HSMTOOL_ARCHIVE_FORMAT:-v2}
+
+ if ! [[ $HSMTOOL =~ hsmtool ]]; then
+ echo "HSMTOOL = '$HSMTOOL' does not contain 'hsmtool', GLWT" >&2
+ fi
HSM_ARCHIVE_NUMBER=2
done
}
-search_copytools() {
- local hosts=${1:-$(facet_active_host $SINGLEAGT)}
- do_nodesv $hosts "pgrep -x $HSMTOOL_BASE"
+pkill_copytools() {
+ local hosts="$1"
+ local signal="$2"
+
+ do_nodes "$hosts" \
+ "pkill --pidfile=$HSMTOOL_PID_FILE --signal=$signal hsmtool"
}
-kill_copytools() {
- local hosts=${1:-$(facet_active_host $SINGLEAGT)}
+copytool_continue() {
+ local agents=${1:-$(facet_active_host $SINGLEAGT)}
- echo "Killing existing copytools on $hosts"
- do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true
+ pkill_copytools "$agents" CONT || return 0
+ echo "Copytool is continued on $agents"
}
-wait_copytools() {
+kill_copytools() {
local hosts=${1:-$(facet_active_host $SINGLEAGT)}
- local wait_timeout=200
- local wait_start=$SECONDS
- local wait_end=$((wait_start + wait_timeout))
- local sleep_time=100000 # 0.1 second
-
- while ((SECONDS < wait_end)); do
- if ! search_copytools $hosts; then
- echo "copytools stopped in $((SECONDS - wait_start))s"
- return 0
- fi
-
- echo "copytools still running on $hosts"
- usleep $sleep_time
- [ $sleep_time -lt 32000000 ] && # 3.2 seconds
- sleep_time=$(bc <<< "$sleep_time * 2")
- done
- # try to dump Copytool's stack
- do_nodesv $hosts "echo 1 >/proc/sys/kernel/sysrq ; " \
- "echo t >/proc/sysrq-trigger"
-
- echo "copytools failed to stop in ${wait_timeout}s"
-
- return 1
+ echo "Killing existing copytools on $hosts"
+ pkill_copytools "$hosts" TERM || return 0
+ copytool_continue "$hosts"
}
copytool_monitor_cleanup() {
__lhsmtool_rebind()
{
- do_facet $facet $HSMTOOL -p "$hsm_root" --rebind "$@" "$mountpoint"
+ do_facet $facet $HSMTOOL \
+ "${hsmtool_options[@]}" --rebind "$@" "$mountpoint"
}
__lhsmtool_import()
{
mkdir -p "$(dirname "$2")" ||
error "cannot create directory '$(dirname "$2")'"
- do_facet $facet $HSMTOOL -p "$hsm_root" --import "$@" "$mountpoint"
+ do_facet $facet $HSMTOOL \
+ "${hsmtool_options[@]}" --import "$@" "$mountpoint"
}
__lhsmtool_setup()
{
- local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root \"$hsm_root\""
+ local host="$(facet_host "$facet")"
+ local cmd="$HSMTOOL ${hsmtool_options[@]} --daemon --pid-file=$HSMTOOL_PID_FILE"
+
[ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth"
[ -n "$archive_id" ] && cmd+=" --archive $archive_id"
- [ ${#misc_options[@]} -gt 0 ] &&
- cmd+=" $(IFS=" " echo "$@")"
- cmd+=" \"$mountpoint\""
+ # [ ${#misc_options[@]} -gt 0 ] &&
+# cmd+=" $(IFS=" " echo "$@")"
+ cmd+=" $@ \"$mountpoint\""
- echo "Starting copytool $facet on $(facet_host $facet)"
- stack_trap "do_facet $facet libtool execute pkill -x '$HSMTOOL' || true" EXIT
- do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1"
+ echo "Starting copytool '$facet' on '$host' with cmdline '$cmd'"
+ stack_trap "pkill_copytools $host TERM || true" EXIT
+ do_node "$host" "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1"
}
hsm_root() {
# Use default values
local facet=$SINGLEAGT
local mountpoint="${MOUNT2:-$MOUNT}"
- local hsm_root="${hsm_root:-$(hsm_root "$facet")}"
# Parse arguments
local fail_on_error=true
- local -a misc_options
+ local -a hsmtool_options=()
+ local -a action_options=()
+
+ if [[ -n "$HSMTOOL_ARCHIVE_FORMAT" ]]; then
+ hsmtool_options+=("--archive-format=$HSMTOOL_ARCHIVE_FORMAT")
+ fi
+
+ if [[ -n "$HSMTOOL_VERBOSE" ]]; then
+ hsmtool_options+=("$HSMTOOL_VERBOSE")
+ fi
+
while [ $# -gt 0 ]; do
case "$1" in
-f|--facet)
;;
-h|--hsm-root)
shift
- hsm_root="$1"
+ local hsm_root="$1"
;;
-b|--bwlimit)
shift
;;
*)
# Uncommon(/copytool dependent) option
- misc_options+=("$1")
+ action_options+=("$1")
;;
esac
shift
done
+ local hsm_root="${hsm_root:-$(hsm_root "$facet")}"
+ hsmtool_options+=("--hsm-root=$hsm_root")
+
stack_trap "do_facet $facet rm -rf '$hsm_root'" EXIT
do_facet $facet mkdir -p "$hsm_root" ||
error "mkdir '$hsm_root' failed"
;;
esac
- __${copytool}_${action} "${misc_options[@]}"
+ __${copytool}_${action} "${action_options[@]}"
if [ $? -ne 0 ]; then
local error_msg
error_msg="Failed to start copytool $facet on '$host'"
;;
import)
- local src="${misc_options[0]}"
- local dest="${misc_options[1]}"
+ local src="${action_options[0]}"
+ local dest="${action_options[1]}"
error_msg="Failed to import '$src' to '$dest'"
;;
rebind)
local mdtno
local rc=0
if [[ "$value" != "" ]]; then
- value="=$value"
+ value="='$value'"
fi
for mdtno in $(seq 1 $MDSCOUNT); do
local idx=$(($mdtno - 1))
return $rc
}
-wait_result() {
- local facet=$1
- shift
- wait_update --verbose $(facet_active_host $facet) "$@"
-}
-
mdts_check_param() {
local key="$1"
local target="$2"
local timeout="$3"
local mdtno
+
for mdtno in $(seq 1 $MDSCOUNT); do
local idx=$(($mdtno - 1))
- wait_result mds${mdtno} \
+ wait_update_facet --verbose mds${mdtno} \
"$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \
$timeout ||
error "$key state is not '$target' on mds${mdtno}"
local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions"
cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
- wait_result $mds "$cmd" "$state" 200 ||
+ wait_update_facet --verbose $mds "$cmd" "$state" 200 ||
error "request on $fid is not $state on $mds"
}
}
sleep_maxage() {
- local delay=$(do_facet $SINGLEMDS lctl get_param -n lo[vd].*.qos_maxage |
- awk '{ print $1 * 2; exit; }')
+ local delay=$(do_facet mds1 lctl get_param -n lod.*.qos_maxage |
+ awk '{ print $1 + 5; exit; }')
+ sleep $delay
+}
+
+sleep_maxage_lmv() {
+ local delay=$(lctl get_param -n lmv.*.qos_maxage |
+ awk '{ print $1 + 5; exit; }')
sleep $delay
}
$STATX --quiet --version
return $?
}
+
+# lfs rm_entry is disabled on native client
+is_rmentry_supported() {
+ $LFS rm_entry $DIR/dir/not/exists > /dev/null
+ # is return code ENOENT?
+ (( $? == 2 ))
+}
+
+#
+# wrappers for createmany and unlinkmany
+# to set debug=0 if number of creates is high enough
+# this is to speedup testing
+#
+function createmany() {
+ local count=${!#}
+ local rc
+
+ if (( count > 100 )); then
+ debugsave
+ do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0
+ fi
+ $LUSTRE/tests/createmany $*
+ rc=$?
+ debugrestore > /dev/null
+
+ return $rc
+}
+
+function unlinkmany() {
+ local count=${!#}
+ local rc
+
+ if (( count > 100 )); then
+ debugsave
+ do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0
+ fi
+ $LUSTRE/tests/unlinkmany $*
+ rc=$?
+ debugrestore > /dev/null
+
+ return $rc
+}
+
+# Check if fallocate on facet is working. Returns fallocate mode if enabled.
+# Takes optional facet name as argument, to allow separate MDS/OSS checks.
+function check_fallocate_supported()
+{
+ local facet=${1:-ost1}
+ local supported="FALLOCATE_SUPPORTED_$facet"
+ local fstype="${facet}_FSTYPE"
+
+ if [[ -n "${!supported}" ]]; then
+ echo "${!supported}"
+ return 0
+ fi
+ if [[ -z "${!fstype}" ]]; then
+ eval export $fstype=$(facet_fstype $facet)
+ fi
+ if [[ "${!fstype}" != "ldiskfs" ]]; then
+ echo "fallocate on ${!fstype} doesn't consume space" 1>&2
+ return 1
+ fi
+
+ local fa_mode="osd-ldiskfs.$(facet_svc $facet).fallocate_zero_blocks"
+ local mode=$(do_facet $facet $LCTL get_param -n $fa_mode 2>/dev/null |
+ head -n 1)
+
+ if [[ -z "$mode" ]]; then
+ echo "fallocate not supported on $facet" 1>&2
+ return 1
+ fi
+ eval export $supported="$mode"
+
+ echo ${!supported}
+ return 0
+}
+
+# Check if fallocate supported on OSTs, enable if unset, skip if unavailable.
+# Takes optional facet name as argument.
+function check_fallocate_or_skip()
+{
+ local facet=$1
+
+ check_fallocate_supported $1 || skip "fallocate not supported"
+}
+
+# Check if fallocate supported on OSTs, enable if unset, default mode=0
+# Optionally pass the OST fallocate mode (0=unwritten extents, 1=zero extents)
+function check_set_fallocate()
+{
+ local new_mode="$1"
+ local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks"
+ local old_mode="$(check_fallocate_supported)"
+
+ [[ -n "$old_mode" ]] || { echo "fallocate not supported"; return 1; }
+ [[ -z "$new_mode" && "$old_mode" != "-1" ]] &&
+ { echo "keep default fallocate mode: $old_mode"; return 0; }
+ [[ "$new_mode" && "$old_mode" == "$new_mode" ]] &&
+ { echo "keep current fallocate mode: $old_mode"; return 0; }
+ local osts=$(comma_list $(osts_nodes))
+
+ stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode"
+ do_nodes $osts $LCTL set_param $fa_mode=${new_mode:-0} ||
+ error "set $fa_mode=$new_mode"
+}
+
+# Check if fallocate supported on OSTs, enable if unset, skip if unavailable
+function check_set_fallocate_or_skip()
+{
+ check_set_fallocate || skip "need >= 2.13.57 and ldiskfs for fallocate"
+}
+
+function disable_opencache()
+{
+ local state=$($LCTL get_param -n "llite.*.opencache_threshold_count" |
+ head -1)
+
+ test -z "${saved_OPENCACHE_value}" &&
+ export saved_OPENCACHE_value="$state"
+
+ [[ "$state" = "off" ]] && return
+
+ $LCTL set_param -n "llite.*.opencache_threshold_count"=off
+}
+
+function set_opencache()
+{
+ local newvalue="$1"
+ local state=$($LCTL get_param -n "llite.*.opencache_threshold_count")
+
+ [[ -n "$newvalue" ]] || return
+
+ [[ -n "${saved_OPENCACHE_value}" ]] ||
+ export saved_OPENCACHE_value="$state"
+
+ $LCTL set_param -n "llite.*.opencache_threshold_count"=$newvalue
+}
+
+
+
+function restore_opencache()
+{
+ [[ -z "${saved_OPENCACHE_value}" ]] ||
+ $LCTL set_param -n "llite.*.opencache_threshold_count"=${saved_OPENCACHE_value}
+}
+
+# LU-13417: XXX lots of tests assume the directory to be created under MDT0,
+# created on MDT0, use this function to create directory on specific MDT
+# explicitly, and set default LMV to create subdirs on the same MDT too.
+mkdir_on_mdt() {
+ local mdt
+ local OPTIND=1
+
+ while getopts "i:" opt $*; do
+ case $opt in
+ i) mdt=$OPTARG;;
+ esac
+ done
+
+ shift $((OPTIND - 1))
+
+ $LFS mkdir -i $mdt -c 1 $*
+}
+
+mkdir_on_mdt0() {
+ mkdir_on_mdt -i0 $*
+}
+
+# Wait for nodemap synchronization
+wait_nm_sync() {
+ local nodemap_name=$1
+ local key=$2
+ local value=$3
+ local opt=$4
+ local proc_param
+ local is_active=$(do_facet mgs $LCTL get_param -n nodemap.active)
+ local max_retries=20
+ local is_sync
+ local out1=""
+ local out2
+ local mgs_ip=$(host_nids_address $mgs_HOST $NETTYPE | cut -d' ' -f1)
+ local i
+
+ if [ "$nodemap_name" == "active" ]; then
+ proc_param="active"
+ elif [ -z "$key" ]; then
+ proc_param=${nodemap_name}
+ else
+ proc_param="${nodemap_name}.${key}"
+ fi
+ if [ "$opt" == "inactive" ]; then
+ # check nm sync even if nodemap is not activated
+ is_active=1
+ opt=""
+ fi
+ (( is_active == 0 )) && [ "$proc_param" != "active" ] && return
+
+ if [ -z "$value" ]; then
+ out1=$(do_facet mgs $LCTL get_param $opt \
+ nodemap.${proc_param} 2>/dev/null)
+ echo "On MGS ${mgs_ip}, ${proc_param} = $out1"
+ else
+ out1=$value;
+ fi
+
+ # if servers run on the same node, it is impossible to tell if they get
+ # synced with the mgs, so just wait an arbitrary 10 seconds
+ if [ $(facet_active_host mgs) == $(facet_active_host mds) ] &&
+ [ $(facet_active_host mgs) == $(facet_active_host ost1) ]; then
+ echo "waiting 10 secs for sync"
+ sleep 10
+ return
+ fi
+
+ # wait up to 10 seconds for other servers to sync with mgs
+ for i in $(seq 1 10); do
+ for node in $(all_server_nodes); do
+ local node_ip=$(host_nids_address $node $NETTYPE |
+ cut -d' ' -f1)
+
+ is_sync=true
+ if [ -z "$value" ]; then
+ [ $node_ip == $mgs_ip ] && continue
+ fi
+
+ out2=$(do_node $node $LCTL get_param $opt \
+ nodemap.$proc_param 2>/dev/null)
+ echo "On $node ${node_ip}, ${proc_param} = $out2"
+ [ "$out1" != "$out2" ] && is_sync=false && break
+ done
+ $is_sync && break
+ sleep 1
+ done
+ if ! $is_sync; then
+ echo MGS
+ echo $out1
+ echo OTHER - IP: $node_ip
+ echo $out2
+ error "mgs and $nodemap_name ${key} mismatch, $i attempts"
+ fi
+ echo "waited $((i - 1)) seconds for sync"
+}
+
+consume_precreations() {
+ local dir=$1
+ local mfacet=$2
+ local OSTIDX=$3
+ local extra=${4:-2}
+ local OST=$(ostname_from_index $OSTIDX $dir)
+
+ mkdir_on_mdt -i $(facet_index $mfacet) $dir/${OST}
+ $LFS setstripe -i $OSTIDX -c 1 ${dir}/${OST}
+
+ # on the mdt's osc
+ local mdtosc_proc=$(get_mdtosc_proc_path $mfacet $OST)
+ local last_id=$(do_facet $mfacet $LCTL get_param -n \
+ osp.$mdtosc_proc.prealloc_last_id)
+ local next_id=$(do_facet $mfacet $LCTL get_param -n \
+ osp.$mdtosc_proc.prealloc_next_id)
+ echo "Creating to objid $last_id on ost $OST..."
+ createmany -o $dir/${OST}/f $next_id $((last_id - next_id + extra))
+}
+
+__exhaust_precreations() {
+ local OSTIDX=$1
+ local FAILLOC=$2
+ local FAILIDX=${3:-$OSTIDX}
+ local ofacet=ost$((OSTIDX + 1))
+
+ mkdir_on_mdt0 $DIR/$tdir
+ local mdtidx=$($LFS getstripe -m $DIR/$tdir)
+ local mfacet=mds$((mdtidx + 1))
+ echo OSTIDX=$OSTIDX MDTIDX=$mdtidx
+
+ local mdtosc_proc=$(get_mdtosc_proc_path $mfacet)
+ do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc*
+
+#define OBD_FAIL_OST_ENOSPC 0x215
+ do_facet $ofacet $LCTL set_param fail_val=$FAILIDX fail_loc=0x215
+
+ consume_precreations $DIR/$tdir $mfacet $OSTIDX
+
+ do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc*
+ do_facet $ofacet $LCTL set_param fail_loc=$FAILLOC
+}
+
+exhaust_precreations() {
+ __exhaust_precreations $1 $2 $3
+ sleep_maxage
+}
+
+exhaust_all_precreations() {
+ local i
+ for (( i=0; i < OSTCOUNT; i++ )) ; do
+ __exhaust_precreations $i $1 -1
+ done
+ sleep_maxage
+}
+
+force_new_seq_ost() {
+ local dir=$1
+ local mfacet=$2
+ local OSTIDX=$3
+ local OST=$(ostname_from_index $OSTIDX)
+ local mdtosc_proc=$(get_mdtosc_proc_path $mfacet $OST)
+
+ do_facet $mfacet $LCTL set_param \
+ osp.$mdtosc_proc.prealloc_force_new_seq=1
+ # consume preallocated objects, to wake up precreate thread
+ consume_precreations $dir $mfacet $OSTIDX
+ do_facet $mfacet $LCTL set_param \
+ osp.$mdtosc_proc.prealloc_force_new_seq=0
+}
+
+force_new_seq() {
+ local mfacet=$1
+ local MDTIDX=$(facet_index $mfacet)
+ local MDT=$(mdtname_from_index $MDTIDX $DIR)
+ local i
+
+ mkdir_on_mdt -i $MDTIDX $DIR/${MDT}
+ for (( i=0; i < OSTCOUNT; i++ )) ; do
+ force_new_seq_ost $DIR/${MDT} $mfacet $i &
+ done
+ wait
+ rm -rf $DIR/${MDT}
+}
+
+force_new_seq_all() {
+ local i
+
+ for (( i=0; i < MDSCOUNT; i++ )) ; do
+ force_new_seq mds$((i + 1)) &
+ done
+ wait
+ sleep_maxage
+}
+
+ost_set_temp_seq_width_all() {
+ local osts=$(comma_list $(osts_nodes))
+ local width=$(do_facet ost1 $LCTL get_param -n seq.*OST0000-super.width)
+
+ do_nodes $osts $LCTL set_param seq.*OST*-super.width=$1
+ stack_trap "do_nodes $osts $LCTL set_param seq.*OST*-super.width=$width"
+}
+
+verify_yaml_available() {
+ python3 -c "import yaml; yaml.safe_load('''a: b''')"
+}
+
+verify_yaml() {
+ python3 -c "import sys, yaml; obj = yaml.safe_load(sys.stdin)"
+}
+
+verify_compare_yaml() {
+ python3 -c "import sys, yaml; f=open(\"$1\", \"r\"); obj1 = yaml.safe_load(f); f=open(\"$2\", \"r\"); obj2 = yaml.safe_load(f); sys.exit(obj1 != obj2)"
+}
+
+zfs_or_rotational() {
+ local ost_idx=0
+ local ost_name=$(ostname_from_index $ost_idx $MOUNT)
+ local param="get_param -n osd-*.${ost_name}.nonrotational"
+ local nonrotat=$(do_facet ost1 $LCTL $param)
+
+ if [[ -z "$nonrotat" ]]; then
+ # At this point there is no point moving ahead.
+ # Will stop here and dump all the info
+ set -x
+ local ost_name=$(ostname_from_index $ost_idx)
+ set +x
+ error "$LCTL $input_str"
+ fi
+
+ if [[ "$ost1_FSTYPE" == "zfs" ]] || (( "$nonrotat" == 0 )); then
+ return 0
+ else
+ return 1
+ fi
+}