X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=5bb370206ded051bf1de5c6dd553ef55af4ee540;hp=2385ef7d3333101763934a605c9d58743d6b0ed9;hb=HEAD;hpb=65e391e95b6d53b36840c95c2499f17fd84a1d36 diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 2385ef7..120f5a1 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -11,7 +11,6 @@ export VERBOSE=${VERBOSE:-false} export GSS=${GSS:-false} export GSS_SK=${GSS_SK:-false} export GSS_KRB5=false -export GSS_PIPEFS=false export SHARED_KEY=${SHARED_KEY:-false} export SK_PATH=${SK_PATH:-/tmp/test-framework-keys} export SK_OM_PATH=$SK_PATH'/tmp-request-mount' @@ -23,7 +22,8 @@ export SK_S2S=${SK_S2S:-false} export SK_S2SNM=${SK_S2SNM:-TestFrameNM} export SK_S2SNMCLI=${SK_S2SNMCLI:-TestFrameNMCli} export SK_SKIPFIRST=${SK_SKIPFIRST:-true} -export IDENTITY_UPCALL=default +# whether identity upcall is enabled (true), disabled (false), or default +export IDENTITY_UPCALL=${IDENTITY_UPCALL:-default} export QUOTA_AUTO=1 export FLAKEY=${FLAKEY:-true} # specify environment variable containing batch job name for server statistics @@ -36,6 +36,8 @@ export UMOUNT=${UMOUNT:-"umount -d"} export LSNAPSHOT_CONF="/etc/ldev.conf" export LSNAPSHOT_LOG="/var/log/lsnapshot.log" +export DATA_SEQ_MAX_WIDTH=0x1ffffff + # sles12 umount has a issue with -d option [ -e /etc/SuSE-release ] && grep -w VERSION /etc/SuSE-release | grep -wq 12 && { export UMOUNT="umount" @@ -53,9 +55,9 @@ LUSTRE_TESTS_CFG_DIR=${LUSTRE_TESTS_CFG_DIR:-${LUSTRE}/tests/cfg} EXCEPT_LIST_FILE=${EXCEPT_LIST_FILE:-${LUSTRE_TESTS_CFG_DIR}/tests-to-skip.sh} if [ -f "$EXCEPT_LIST_FILE" ]; then - echo "Reading test skip list from $EXCEPT_LIST_FILE" - cat $EXCEPT_LIST_FILE - . $EXCEPT_LIST_FILE + echo "Reading test skip list from $EXCEPT_LIST_FILE" + cat $EXCEPT_LIST_FILE + . $EXCEPT_LIST_FILE fi # check config files for options in decreasing order of preference @@ -70,6 +72,7 @@ sanitize_parameters() { for i in DIR DIR1 DIR2 MOUNT MOUNT1 MOUNT2 do local path=${!i} + if [ -d "$path" ]; then eval export $i=$(echo $path | sed -r 's/\/+$//g') fi @@ -88,10 +91,10 @@ assert_DIR () { } usage() { - echo "usage: $0 [-r] [-f cfgfile]" - echo " -r: reformat" + echo "usage: $0 [-r] [-f cfgfile]" + echo " -r: reformat" - exit + exit } print_summary () { @@ -103,50 +106,265 @@ print_summary () { printf "$form" "status" "script" "Total(sec)" "E(xcluded) S(low)" echo "---------------------------------------------------------------" - for O in $DEFAULT_SUITES; do - O=$(echo $O | tr "-" "_" | tr "[:lower:]" "[:upper:]") - [ "${!O}" = "no" ] && continue || true - local o=$(echo $O | tr "[:upper:]_" "[:lower:]-") - local log=${TMP}/${o}.log - if is_sanity_benchmark $o; then - log=${TMP}/sanity-benchmark.log - fi - local slow= - local skipped= - local total= - local status=Unfinished - if [ -f $log ]; then - skipped=$(grep excluded $log | awk '{ printf " %s", $3 }' | - sed 's/test_//g') - slow=$(egrep "^PASS|^FAIL" $log | tr -d "("| sed s/s\)$//g | - sort -nr -k 3 | head -n5 | awk '{ print $2":"$3"s" }') - total=$(grep duration $log | awk '{ print $2 }') - if [ "${!O}" = "done" ]; then - status=Done - fi - if $DDETAILS; then - local durations=$(egrep "^PASS|^FAIL" $log | - tr -d "("| sed s/s\)$//g | - awk '{ print $2":"$3"|" }') - details=$(printf "%s\n%s %s %s\n" "$details" \ - "DDETAILS" "$O" "$(echo $durations)") - fi - fi - printf "$form" $status "$O" "${total}" "E=$skipped" - printf "$form" "-" "-" "-" "S=$(echo $slow)" - done - - for O in $DEFAULT_SUITES; do - O=$(echo $O | tr "-" "_" | tr "[:lower:]" "[:upper:]") - if [ "${!O}" = "no" ]; then - printf "$form" "Skipped" "$O" "" - fi - done - - # print the detailed tests durations if DDETAILS=true - if $DDETAILS; then - echo "$details" - fi + for O in $DEFAULT_SUITES; do + O=$(echo $O | tr "-" "_" | tr "[:lower:]" "[:upper:]") + [ "${!O}" = "no" ] && continue || true + local o=$(echo $O | tr "[:upper:]_" "[:lower:]-") + local log=${TMP}/${o}.log + + if is_sanity_benchmark $o; then + log=${TMP}/sanity-benchmark.log + fi + local slow= + local skipped= + local total= + local status=Unfinished + + if [ -f $log ]; then + skipped=$(grep excluded $log | + awk '{ printf " %s", $3 }' | sed 's/test_//g') + slow=$(egrep "^PASS|^FAIL" $log | + tr -d "("| sed s/s\)$//g | sort -nr -k 3 | + head -n5 | awk '{ print $2":"$3"s" }') + total=$(grep duration $log | awk '{ print $2 }') + if [ "${!O}" = "done" ]; then + status=Done + fi + if $DDETAILS; then + local durations=$(egrep "^PASS|^FAIL" $log | + tr -d "("| sed s/s\)$//g | + awk '{ print $2":"$3"|" }') + details=$(printf "%s\n%s %s %s\n" "$details" \ + "DDETAILS" "$O" "$(echo $durations)") + fi + fi + printf "$form" $status "$O" "${total}" "E=$skipped" + printf "$form" "-" "-" "-" "S=$(echo $slow)" + done + + for O in $DEFAULT_SUITES; do + O=$(echo $O | tr "-" "_" | tr "[:lower:]" "[:upper:]") + if [ "${!O}" = "no" ]; then + printf "$form" "Skipped" "$O" "" + fi + done + + # print the detailed tests durations if DDETAILS=true + if $DDETAILS; then + echo "$details" + fi +} + +reset_lustre() { + if $do_reset; then + stopall + setupall + fi +} + +setup_if_needed() { + ! ${do_setup} && return + nfs_client_mode && return + AUSTER_CLEANUP=false + + local MOUNTED=$(mounted_lustre_filesystems) + + if $(echo $MOUNTED' ' | grep -w -q $MOUNT' '); then + check_config_clients $MOUNT + # init_facets_vars + # init_param_vars + return + fi + + echo "Lustre is not mounted, trying to do setup ... " + $reformat && CLEANUP_DM_DEV=true formatall + setupall + + MOUNTED=$(mounted_lustre_filesystems) + if ! $(echo $MOUNTED' ' | grep -w -q $MOUNT' '); then + echo "Lustre is not mounted after setup! " + exit 1 + fi + AUSTER_CLEANUP=true +} + +cleanup_if_needed() { + if $AUSTER_CLEANUP; then + cleanupall + fi +} + +find_script_in_path() { + target=$1 + path=$2 + for dir in $(tr : " " <<< $path); do + if [ -f $dir/$target ]; then + echo $dir/$target + return 0 + fi + if [ -f $dir/$target.sh ]; then + echo $dir/$target.sh + return 0 + fi + done + return 1 +} + +title() { + log "-----============= acceptance-small: "$*" ============----- `date`" +} + +doit() { + if $dry_run; then + printf "Would have run: %s\n" "$*" + return 0 + fi + if $verbose; then + printf "Running: %s\n" "$*" + fi + "$@" +} + + +run_suite() { + local suite_name=$1 + local suite_script=$2 + + title $suite_name + log_test $suite_name + + rm -f $TF_FAIL + touch $TF_SKIP + + local start_ts=$(date +%s) + + doit $script_lang $suite_script + + local rc=$? + local duration=$(($(date +%s) - $start_ts)) + local status="PASS" + + if [[ $rc -ne 0 || -f $TF_FAIL ]]; then + status="FAIL" + elif [[ -f $TF_SKIP ]]; then + status="SKIP" + fi + log_test_status $duration $status + [[ ! -f $TF_SKIP ]] || rm -f $TF_SKIP + + reset_lustre + + return $rc +} + +run_suite_logged() { + local suite_name=${1%.sh} + local suite=$(echo ${suite_name} | tr "[:lower:]-" "[:upper:]_") + + suite_script=$(find_script_in_path $suite_name $LUSTRE/tests) + + if [[ -z $suite_script ]]; then + echo "Can't find test script for $suite_name" + return 1 + fi + + echo "run_suite $suite_name $suite_script" + + local log_name=${suite_name}.suite_log.$(hostname -s).log + + if $verbose; then + run_suite $suite_name $suite_script 2>&1 |tee $LOGDIR/$log_name + else + run_suite $suite_name $suite_script > $LOGDIR/$log_name 2>&1 + fi + + return ${PIPESTATUS[0]} +} + +reset_logging() { + export LOGDIR=$1 + + unset YAML_LOG + init_logging +} + +split_commas() { + echo "${*//,/ }" +} + +run_suites() { + local n=0 + local argv=("$@") + + while ((n < repeat_count)); do + local RC=0 + local logdir=${test_logs_dir} + local first_suite=$FIRST_SUITE + + ((repeat_count > 1)) && logdir="$logdir/$n" + reset_logging $logdir + set -- "${argv[@]}" + while [[ -n $1 ]]; do + unset ONLY EXCEPT START_AT STOP_AT + local opts="" + local time_limit="" + + suite=$1 + shift; + while [[ -n $1 ]]; do + case "$1" in + --only) + shift; + export ONLY=$(split_commas $1) + + opts+="ONLY=$ONLY ";; + --suite) + shift; + export SUITE=$(split_commas $1) + + opts+="SUITE=$SUITE ";; + --pattern) + shift; + export PATTERN=$(split_commas $1) + + opts+="PATTERN=$PATTERN ";; + --except) + shift; + export EXCEPT=$(split_commas $1) + + opts+="EXCEPT=$EXCEPT ";; + --start-at) + shift; + export START_AT=$1 + + opts+="START_AT=$START_AT ";; + --stop-at) + shift; + export STOP_AT=$1 + + opts+="STOP_AT=$STOP_AT ";; + --time-limit) + shift; + time_limit=$1;; + *) + break;; + esac + shift + done + + # If first_suite not set or this is the first suite + if [ "x"$first_suite == "x" ] || [ $first_suite == $suite ]; then + echo "running: $suite $opts" + run_suite_logged $suite || RC=$? + unset first_suite + echo $suite returned $RC + fi + done + if $upload_logs; then + $upload_script $LOGDIR + fi + n=$((n + 1)) + done } # Get information about the Lustre environment. The information collected @@ -156,14 +374,26 @@ print_summary () { # output: No return values, environment variables are exported get_lustre_env() { - - export mds1_FSTYPE=${mds1_FSTYPE:-$(facet_fstype mds1)} - export ost1_FSTYPE=${ost1_FSTYPE:-$(facet_fstype ost1)} - - export MGS_VERSION=$(lustre_version_code mgs) - export MDS1_VERSION=$(lustre_version_code mds1) - export OST1_VERSION=$(lustre_version_code ost1) - export CLIENT_VERSION=$(lustre_version_code client) + if ! $RPC_MODE; then + export mds1_FSTYPE=${mds1_FSTYPE:-$(facet_fstype mds1)} + export ost1_FSTYPE=${ost1_FSTYPE:-$(facet_fstype ost1)} + + export MGS_VERSION=$(lustre_version_code mgs) + export MDS1_VERSION=$(lustre_version_code mds1) + export OST1_VERSION=$(lustre_version_code ost1) + export CLIENT_VERSION=$(lustre_version_code client) + + # import server-side version information into local variables + # so they can be used in tests instead of checked separately + # MGS_OS_VERSION_ID, MGS_OS_ID, MGS_OS_ID_LIKE, + # MDS1_OS_VERSION_ID, MDS1_OS_ID, MDS1_OS_ID_LIKE, + # OST1_OS_VERSION_ID, OST1_OS_ID, OST1_OS_ID_LIKE, + # CLIENT_OS_VERSION_ID, CLIENT_OS_ID, CLIENT_OS_ID_LIKE + lustre_os_release "eval export" mgs + lustre_os_release "eval export" mds1 + lustre_os_release "eval export" ost1 + lustre_os_release "eval export" client + fi # Prefer using "mds1" directly instead of SINGLEMDS. # Keep this for compat until it is removed from scripts. @@ -180,6 +410,9 @@ init_test_env() { export KEEP_ZPOOL=${KEEP_ZPOOL:-false} export CLEANUP_DM_DEV=false export PAGE_SIZE=$(get_page_size client) + export NAME=${NAME:-local} + + . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} export MKE2FS=$MKE2FS if [ -z "$MKE2FS" ]; then @@ -273,6 +506,8 @@ init_test_env() { fi export LST=${LST:-"$LUSTRE/../lnet/utils/lst"} [ ! -f "$LST" ] && export LST=$(which lst) + export LSTSH=${LSTSH:-"$LUSTRE/../lustre-iokit/lst-survey/lst.sh"} + [ ! -f "$LSTSH" ] && export LSTSH=$(which lst.sh) export SGPDDSURVEY=${SGPDDSURVEY:-"$LUSTRE/../lustre-iokit/sgpdd-survey/sgpdd-survey")} [ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey) export MCREATE=${MCREATE:-mcreate} @@ -291,27 +526,33 @@ init_test_env() { if ! echo $PATH | grep -q $LUSTRE/tests/mpi; then export PATH=$LUSTRE/tests/mpi:$PATH fi - export RSYNC_RSH=${RSYNC_RSH:-rsh} + export LNETCTL=${LNETCTL:-"$LUSTRE/../lnet/utils/lnetctl"} + [ ! -f "$LNETCTL" ] && export LNETCTL=$(which lnetctl 2> /dev/null) export LCTL=${LCTL:-"$LUSTRE/utils/lctl"} [ ! -f "$LCTL" ] && export LCTL=$(which lctl) export LFS=${LFS:-"$LUSTRE/utils/lfs"} [ ! -f "$LFS" ] && export LFS=$(which lfs) + export KSOCKLND_CONFIG=${KSOCKLND_CONFIG:-"$LUSTRE/scripts/ksocklnd-config"} + [ ! -f "$KSOCKLND_CONFIG" ] && + export KSOCKLND_CONFIG=$(which ksocklnd-config 2> /dev/null) - export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"} + export PERM_CMD=$(echo ${PERM_CMD:-"$LCTL conf_param"}) export L_GETIDENTITY=${L_GETIDENTITY:-"$LUSTRE/utils/l_getidentity"} if [ ! -f "$L_GETIDENTITY" ]; then - if `which l_getidentity > /dev/null 2>&1`; then + if $(which l_getidentity > /dev/null 2>&1); then export L_GETIDENTITY=$(which l_getidentity) else export L_GETIDENTITY=NONE fi fi export LL_DECODE_FILTER_FID=${LL_DECODE_FILTER_FID:-"$LUSTRE/utils/ll_decode_filter_fid"} - [ ! -f "$LL_DECODE_FILTER_FID" ] && export LL_DECODE_FILTER_FID="ll_decode_filter_fid" + [ ! -f "$LL_DECODE_FILTER_FID" ] && + export LL_DECODE_FILTER_FID="ll_decode_filter_fid" export LL_DECODE_LINKEA=${LL_DECODE_LINKEA:-"$LUSTRE/utils/ll_decode_linkea"} - [ ! -f "$LL_DECODE_LINKEA" ] && export LL_DECODE_LINKEA="ll_decode_linkea" + [ ! -f "$LL_DECODE_LINKEA" ] && + export LL_DECODE_LINKEA="ll_decode_linkea" export MKFS=${MKFS:-"$LUSTRE/utils/mkfs.lustre"} [ ! -f "$MKFS" ] && export MKFS="mkfs.lustre" export TUNEFS=${TUNEFS:-"$LUSTRE/utils/tunefs.lustre"} @@ -334,10 +575,8 @@ init_test_env() { [ ! -f "$LSOM_SYNC" ] && export LSOM_SYNC=$(which llsom_sync 2> /dev/null) [ -z "$LSOM_SYNC" ] && export LSOM_SYNC="/usr/sbin/llsom_sync" - export NAME=${NAME:-local} - export LGSSD=${LGSSD:-"$LUSTRE/utils/gss/lgssd"} - [ "$GSS_PIPEFS" = "true" ] && [ ! -f "$LGSSD" ] && - export LGSSD=$(which lgssd) + export L_GETAUTH=${L_GETAUTH:-"$LUSTRE/utils/gss/l_getauth"} + [ ! -f "$L_GETAUTH" ] && export L_GETAUTH=$(which l_getauth 2> /dev/null) export LSVCGSSD=${LSVCGSSD:-"$LUSTRE/utils/gss/lsvcgssd"} [ ! -f "$LSVCGSSD" ] && export LSVCGSSD=$(which lsvcgssd 2> /dev/null) export KRB5DIR=${KRB5DIR:-"/usr/kerberos"} @@ -357,7 +596,11 @@ init_test_env() { if $SHARED_KEY; then $RPC_MODE || echo "Using GSS shared-key feature" - which lgss_sk > /dev/null 2>&1 || + [ -n "$LGSS_SK" ] || + export LGSS_SK=$(which lgss_sk 2> /dev/null) + [ -n "$LGSS_SK" ] || + export LGSS_SK="$LUSTRE/utils/gss/lgss_sk" + [ -n "$LGSS_SK" ] || error_exit "built with lgss_sk disabled! SEC=$SEC" GSS=true GSS_SK=true @@ -374,15 +617,6 @@ init_test_env() { ;; esac - case "x$IDUP" in - xtrue) - IDENTITY_UPCALL=true - ;; - xfalse) - IDENTITY_UPCALL=false - ;; - esac - export LOAD_MODULES_REMOTE=${LOAD_MODULES_REMOTE:-false} # Paths on remote nodes, if different @@ -426,14 +660,21 @@ init_test_env() { # Constants used in more than one test script export LOV_MAX_STRIPE_COUNT=2000 + export LMV_MAX_STRIPES_PER_MDT=5 + export DELETE_OLD_POOLS=${DELETE_OLD_POOLS:-false} + export KEEP_POOLS=${KEEP_POOLS:-false} + export PARALLEL=${PARALLEL:-"no"} + export BLCKSIZE=${BLCKSIZE:-4096} export MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines} - . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} get_lustre_env # use localrecov to enable recovery for local clients, LU-12722 - [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] || + [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] || { export MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o localrecov"} + export MGS_MOUNT_OPTS=${MGS_MOUNT_OPTS:-"-o localrecov"} + } + [[ $OST1_VERSION -lt $(version_code 2.13.52) ]] || export OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o localrecov"} } @@ -456,7 +697,7 @@ check_cpt_number() { # code is useful for comparison two version strings to see which is newer. version_code() { # split arguments like "1.8.6-wc3" into "1", "8", "6", "3" - eval set -- $(tr "[:punct:][a-z]" " " <<< $*) + eval set -- $(tr "[:punct:][a-zA-Z]" " " <<< $*) echo -n $(((${1:-0}<<24) | (${2:-0}<<16) | (${3:-0}<<8) | (${4:-0}))) } @@ -527,10 +768,46 @@ lustre_version_code() { version_code $(lustre_build_version $1) } +# Extract the server-side /etc/os-release information into local variables +# usage: lustre_os_release +# generates $facet_OS_ID, $facet_OS_ID_LIKE, $facet_VERSION_ID +# and also $facet_OS_VERSION_CODE=$(version_code $facet_VERSION_ID) +lustre_os_release() { + local action=${1:-echo} + local facet=$2 + local FACET_OS=$(tr "[:lower:]" "[:upper:]" <<<$facet)_OS_ + + [[ "$action" == "echo" ]] && + echo "$facet: $(do_facet $facet "cat /etc/system-release")" + do_facet $facet "[[ -r /etc/os-release ]] || ls -s /etc/*release" 1>&2 + + while read LINE; do + case $LINE in + VERSION_ID=*|ID=*|ID_LIKE=*) $action ${FACET_OS}$LINE ;; + esac + done < <(do_facet $facet "cat /etc/os-release") + + [[ "$action" == "echo" ]] && return 0 + + local facet_version=${FACET_OS}VERSION + $action ${facet_version}_CODE=\$\(version_code \$${facet_version}_ID\) +} + module_loaded () { /sbin/lsmod | grep -q "^\<$1\>" } +check_lfs_df_ret_val() { + # Ignore only EOPNOTSUPP (which is 95; Operation not supported) error + # returned by 'lfs df' for valid dentry but not a lustrefs. + # + # 'lfs df' historically always returned success(0) instead of + # EOPNOTSUPP. This function for compatibility reason, ignores and + # masquerades EOPNOTSUPP as success. + [[ $1 -eq 95 ]] && return 0 + return $1 +} + PRLFS=false lustre_insmod() { local module=$1 @@ -655,29 +932,20 @@ load_module() { fi } -load_modules_local() { - if [ -n "$MODPROBE" ]; then - # use modprobe - echo "Using modprobe to load modules" - return 0 - fi - - # Create special udev test rules on every node - if [ -f $LUSTRE/lustre/conf/99-lustre.rules ]; then { - sed -e 's|/usr/sbin/lctl|$LCTL|g' $LUSTRE/lustre/conf/99-lustre.rules > /etc/udev/rules.d/99-lustre-test.rules - } else { - echo "SUBSYSTEM==\"lustre\", ACTION==\"change\", ENV{PARAM}==\"?*\", RUN+=\"$LCTL set_param '\$env{PARAM}=\$env{SETTING}'\"" > /etc/udev/rules.d/99-lustre-test.rules - } fi - udevadm control --reload-rules - udevadm trigger +do_lnetctl() { + $LCTL mark "$LNETCTL $*" + echo "$LNETCTL $*" + $LNETCTL "$@" +} +load_lnet() { # For kmemleak-enabled kernels we need clear all past state # that obviously has nothing to do with this Lustre run # Disable automatic memory scanning to avoid perf hit. if [ -f /sys/kernel/debug/kmemleak ] ; then - echo scan=off > /sys/kernel/debug/kmemleak - echo scan > /sys/kernel/debug/kmemleak - echo clear > /sys/kernel/debug/kmemleak + echo scan=off > /sys/kernel/debug/kmemleak || true + echo scan > /sys/kernel/debug/kmemleak || true + echo clear > /sys/kernel/debug/kmemleak || true fi echo Loading modules from $LUSTRE @@ -690,6 +958,7 @@ load_modules_local() { else ncpus=$(getconf _NPROCESSORS_CONF 2>/dev/null) local rc=$? + if [ $rc -eq 0 ]; then echo "detected $ncpus online CPUs by getconf" else @@ -703,6 +972,7 @@ load_modules_local() { # partitions. So we just force libcfs to create 2 partitions for # system with 2 or 4 cores local saved_opts="$MODOPTS_LIBCFS" + if [ $ncpus -le 4 ] && [ $ncpus -gt 1 ]; then # force to enable multiple CPU partitions echo "Force libcfs to create 2 CPU partitions" @@ -714,24 +984,58 @@ load_modules_local() { load_module ../libcfs/libcfs/libcfs # Prevent local MODOPTS_LIBCFS being passed as part of environment # variable to remote nodes - MODOPTS_LIBCFS=$saved_opts + unset MODOPTS_LIBCFS - set_default_debug - load_module ../lnet/lnet/lnet + set_default_debug "neterror net nettrace malloc" + if [ "$1" = "config_on_load=1" ]; then + load_module ../lnet/lnet/lnet + else + load_module ../lnet/lnet/lnet "$@" + fi LNDPATH=${LNDPATH:-"../lnet/klnds"} if [ -z "$LNETLND" ]; then case $NETTYPE in - o2ib*) LNETLND="o2iblnd/ko2iblnd" ;; - tcp*) LNETLND="socklnd/ksocklnd" ;; - *) local lnd="${NETTYPE%%[0-9]}lnd" + o2ib*) LNETLND="o2iblnd/ko2iblnd" ;; + tcp*) LNETLND="socklnd/ksocklnd" ;; + kfi*) LNETLND="kfilnd/kkfilnd" ;; + gni*) LNETLND="gnilnd/kgnilnd" ;; + *) local lnd="${NETTYPE%%[0-9]}lnd" [ -f "$LNDPATH/$lnd/k$lnd.ko" ] && LNETLND="$lnd/k$lnd" || LNETLND="socklnd/ksocklnd" esac fi load_module ../lnet/klnds/$LNETLND + + if [ "$1" = "config_on_load=1" ]; then + do_lnetctl lnet configure --all || + return $? + fi +} + +load_modules_local() { + if [ -n "$MODPROBE" ]; then + # use modprobe + echo "Using modprobe to load modules" + return 0 + fi + + # Create special udev test rules on every node + if [ -f $LUSTRE/lustre/conf/99-lustre.rules ]; then { + sed -e 's|/usr/sbin/lctl|$LCTL|g' $LUSTRE/lustre/conf/99-lustre.rules > /etc/udev/rules.d/99-lustre-test.rules + } else { + echo "SUBSYSTEM==\"lustre\", ACTION==\"change\", ENV{PARAM}==\"?*\", RUN+=\"$LCTL set_param '\$env{PARAM}=\$env{SETTING}'\"" > /etc/udev/rules.d/99-lustre-test.rules + } fi + udevadm control --reload-rules + udevadm trigger + + load_lnet + load_module obdclass/obdclass + if ! client_only; then + MODOPTS_PTLRPC=${MODOPTS_PTLRPC:-"lbug_on_grant_miscount=1"} + fi load_module ptlrpc/ptlrpc load_module ptlrpc/gss/ptlrpc_gss load_module fld/fld @@ -755,9 +1059,9 @@ load_modules_local() { load_module mgs/mgs load_module mdd/mdd load_module mdt/mdt - load_module ost/ost + # don't fail if ost module doesn't exist + load_module ost/ost 2>/dev/null || true; load_module lod/lod - load_module osp/osp load_module ofd/ofd load_module osp/osp fi @@ -779,7 +1083,7 @@ load_modules_local() { [ ! -f "$sbin_mount" ] && touch "$sbin_mount" if [ ! -s "$sbin_mount" -a -w "$sbin_mount" ]; then cat <<- EOF > "$sbin_mount" - #!/bin/sh + #!/bin/bash #STUB MARK echo "This $sbin_mount just a mountpoint." 1>&2 echo "It is never supposed to be run." 1>&2 @@ -795,12 +1099,24 @@ load_modules_local() { } load_modules () { + local facets + local facet + local failover load_modules_local # bug 19124 # load modules on remote nodes optionally # lustre-tests have to be installed on these nodes if $LOAD_MODULES_REMOTE; then local list=$(comma_list $(remote_nodes_list)) + + # include failover nodes in case they are not in the list yet + facets=$(get_facets) + for facet in ${facets//,/ }; do + failover=$(facet_failover_host $facet) + [ -n "$list" ] && [[ ! "$list" =~ "$failover" ]] && + list="$list,$failover" + done + if [ -n "$list" ]; then echo "loading modules on: '$list'" do_rpc_nodes "$list" load_modules_local @@ -810,20 +1126,20 @@ load_modules () { check_mem_leak () { LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) - LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" || true) + LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" || + true) if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then echo "$LEAK_LUSTRE" 1>&2 echo "$LEAK_PORTALS" 1>&2 mv $TMP/debug $TMP/debug-leak.`date +%s` || true echo "Memory leaks detected" - [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true + [ -n "$IGNORE_LEAK" ] && + { echo "ignoring leaks" && return 0; } || true return 1 fi } -unload_modules() { - wait_exit_ST client # bug 12845 - +unload_modules_local() { $LUSTRE_RMMOD ldiskfs || return 2 [ -f /etc/udev/rules.d/99-lustre-test.rules ] && @@ -831,15 +1147,23 @@ unload_modules() { udevadm control --reload-rules udevadm trigger + check_mem_leak || return 254 + + return 0 +} + +unload_modules() { + local rc=0 + + wait_exit_ST client # bug 12845 + + unload_modules_local || rc=$? + if $LOAD_MODULES_REMOTE; then local list=$(comma_list $(remote_nodes_list)) if [ -n "$list" ]; then echo "unloading modules on: '$list'" - do_rpc_nodes "$list" $LUSTRE_RMMOD ldiskfs - do_rpc_nodes "$list" check_mem_leak - do_rpc_nodes "$list" "rm -f /etc/udev/rules.d/99-lustre-test.rules" - do_rpc_nodes "$list" "udevadm control --reload-rules" - do_rpc_nodes "$list" "udevadm trigger" + do_rpc_nodes "$list" unload_modules_local fi fi @@ -850,25 +1174,26 @@ unload_modules() { rm -f $sbin_mount fi - check_mem_leak || return 254 + [[ $rc -eq 0 ]] && echo "modules unloaded." - echo "modules unloaded." - return 0 + return $rc } fs_log_size() { - local facet=${1:-$SINGLEMDS} + local facet=${1:-ost1} local size=0 + local mult=$OSTCOUNT case $(facet_fstype $facet) in - ldiskfs) size=50;; # largest seen is 44, leave some headroom + ldiskfs) size=32;; # largest seen is 64 with multiple OSTs # grant_block_size is in bytes, allow at least 2x max blocksize zfs) size=$(lctl get_param osc.$FSNAME*.import | awk '/grant_block_size:/ {print $2/512; exit;}') ;; esac - echo -n $((size * MDSCOUNT)) + [[ $facet =~ mds ]] && mult=$MDTCOUNT + echo -n $((size * mult)) } fs_inode_ksize() { @@ -883,44 +1208,84 @@ fs_inode_ksize() { echo -n $size } +runas_su() { + local user=$1 + local cmd=$2 + shift 2 + local opts="$*" + + if $VERBOSE; then + echo Running as $user: $cmd $opts + fi + cmd=$(which $cmd) + su - $user -c "$cmd $opts" +} + check_gss_daemon_nodes() { - local list=$1 - dname=$2 + local list=$1 + local dname=$2 + local loopmax=10 + local loop + local node + local ret - do_nodesv $list "num=\\\$(ps -o cmd -C $dname | grep $dname | wc -l); + dname=$(basename "$dname" | awk '{print $1}') + do_nodesv $list "num=0; +for proc in \\\$(pgrep $dname); do +[ \\\$(ps -o ppid= -p \\\$proc) -ne 1 ] || ((num++)) +done; if [ \\\"\\\$num\\\" -ne 1 ]; then echo \\\$num instance of $dname; exit 1; fi; " + ret=$? + (( $ret == 0 )) || return $ret + + for node in ${list//,/ }; do + loop=0 + while (( $loop < $loopmax )); do + do_nodesv $node "$L_GETAUTH -d" + ret=$? + (( $ret == 0 )) && break + loop=$((loop + 1)) + sleep 5 + done + (( $loop < $loopmax )) || return 1 + done + return 0 } check_gss_daemon_facet() { - facet=$1 - dname=$2 + local facet=$1 + local dname=$2 - num=`do_facet $facet ps -o cmd -C $dname | grep $dname | wc -l` - if [ $num -ne 1 ]; then - echo "$num instance of $dname on $facet" - return 1 - fi - return 0 + dname=$(basename "$dname" | awk '{print $1}') + num=$(do_facet $facet ps -o cmd -C $dname | grep $dname | wc -l) + if [ $num -ne 1 ]; then + echo "$num instance of $dname on $facet" + return 1 + fi + return 0 } send_sigint() { - local list=$1 - shift - echo Stopping $@ on $list - do_nodes $list "killall -2 $@ 2>/dev/null || true" + local list=$1 + + shift + echo "Stopping "$@" on $list" + do_nodes $list "killall -2 $* 2>/dev/null || true" } # start gss daemons on all nodes, or "daemon" on "nodes" if set start_gss_daemons() { local nodes=$1 local daemon=$2 + local options=$3 if [ "$nodes" ] && [ "$daemon" ] ; then echo "Starting gss daemon on nodes: $nodes" - do_nodes $nodes "$daemon" || return 8 + do_nodes $nodes "$daemon" "$options" || return 8 + check_gss_daemon_nodes $nodes "$daemon" || return 9 return 0 fi @@ -928,45 +1293,30 @@ start_gss_daemons() { echo "Starting gss daemon on mds: $nodes" if $GSS_SK; then # Start all versions, in case of switching - do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z" || return 1 + do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z $options" || + return 1 else - do_nodes $nodes "$LSVCGSSD -v" || return 1 - fi - if $GSS_PIPEFS; then - do_nodes $nodes "$LGSSD -v" || return 2 + do_nodes $nodes "$LSVCGSSD -vvv $options" || return 1 fi nodes=$(comma_list $(osts_nodes)) echo "Starting gss daemon on ost: $nodes" if $GSS_SK; then # Start all versions, in case of switching - do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z" || return 3 + do_nodes $nodes "$LSVCGSSD -vvv -s -m -o -z $options" || + return 3 else - do_nodes $nodes "$LSVCGSSD -v" || return 3 + do_nodes $nodes "$LSVCGSSD -vvv $options" || return 3 fi # starting on clients local clients=${CLIENTS:-$HOSTNAME} - if $GSS_PIPEFS; then - echo "Starting $LGSSD on clients $clients " - do_nodes $clients "$LGSSD -v" || return 4 - fi - - # wait daemons entering "stable" status - sleep 5 # # check daemons are running # nodes=$(comma_list $(mdts_nodes) $(osts_nodes)) - check_gss_daemon_nodes $nodes lsvcgssd || return 5 - if $GSS_PIPEFS; then - nodes=$(comma_list $(mdts_nodes)) - check_gss_daemon_nodes $nodes lgssd || return 6 - fi - if $GSS_PIPEFS; then - check_gss_daemon_nodes $clients lgssd || return 7 - fi + check_gss_daemon_nodes $nodes "$LSVCGSSD" || return 5 } stop_gss_daemons() { @@ -984,6 +1334,7 @@ stop_gss_daemons() { add_sk_mntflag() { # Add mount flags for shared key local mt_opts=$@ + if grep -q skpath <<< "$mt_opts" ; then mt_opts=$(echo $mt_opts | sed -e "s#skpath=[^ ,]*#skpath=$SK_PATH#") @@ -1036,7 +1387,7 @@ init_gss() { if $GSS_SK && ! $SK_NO_KEY; then echo "Loading basic SSK keys on all servers" do_nodes $(comma_list $(all_server_nodes)) \ - "lgss_sk -t server -l $SK_PATH/$FSNAME.key || true" + "$LGSS_SK -t server -l $SK_PATH/$FSNAME.key || true" do_nodes $(comma_list $(all_server_nodes)) \ "keyctl show | grep lustre | cut -c1-11 | sed -e 's/ //g;' | @@ -1094,19 +1445,19 @@ init_gss() { # and S2S now requires keys as well, both for "client" # and for "server" if $SK_S2S; then - lgss_sk -t server -f$FSNAME -n $SK_S2SNMCLI \ + $LGSS_SK -t server -f$FSNAME -n $SK_S2SNMCLI \ -w $SK_PATH/$FSNAME-nmclient.key \ -d /dev/urandom >/dev/null 2>&1 - lgss_sk -t mgs,server -f$FSNAME -n $SK_S2SNM \ + $LGSS_SK -t mgs,server -f$FSNAME -n $SK_S2SNM \ -w $SK_PATH/$FSNAME-s2s-server.key \ -d /dev/urandom >/dev/null 2>&1 fi # basic key create - lgss_sk -t server -f$FSNAME -w $SK_PATH/$FSNAME.key \ + $LGSS_SK -t server -f$FSNAME -w $SK_PATH/$FSNAME.key \ -d /dev/urandom >/dev/null 2>&1 # per-nodemap keys for i in $(seq 0 $((numclients - 1))); do - lgss_sk -t server -f$FSNAME -n c$i \ + $LGSS_SK -t server -f$FSNAME -n c$i \ -w $SK_PATH/nodemap/c$i.key -d /dev/urandom \ >/dev/null 2>&1 done @@ -1118,27 +1469,34 @@ init_gss() { fi # Set client keys to client type to generate prime P if local_mode; then - do_nodes $(all_nodes) "lgss_sk -t client,server -m \ + do_nodes $(all_nodes) "$LGSS_SK -t client,server -m \ $SK_PATH/$FSNAME.key >/dev/null 2>&1" else - do_nodes $clients "lgss_sk -t client -m \ + do_nodes $clients "$LGSS_SK -t client -m \ $SK_PATH/$FSNAME.key >/dev/null 2>&1" - do_nodes $clients "find $SK_PATH/nodemap -name \*.key | \ - xargs -IX lgss_sk -t client -m X >/dev/null 2>&1" + do_nodes $clients "find $SK_PATH/nodemap \ + -name \*.key | xargs -IX $LGSS_SK -t client \ + -m X >/dev/null 2>&1" + # also have a client key available on server side, + # for local client mount + do_nodes $(comma_list $(all_server_nodes)) \ + "cp $SK_PATH/$FSNAME.key $SK_PATH/${FSNAME}_cli.key && \ + $LGSS_SK -t client -m \ + $SK_PATH/${FSNAME}_cli.key >/dev/null 2>&1" fi # This is required for servers as well, if S2S in use if $SK_S2S; then do_nodes $(comma_list $(mdts_nodes)) \ "cp $SK_PATH/$FSNAME-s2s-server.key \ - $SK_PATH/$FSNAME-s2s-client.key; lgss_sk \ + $SK_PATH/$FSNAME-s2s-client.key; $LGSS_SK \ -t client -m $SK_PATH/$FSNAME-s2s-client.key \ >/dev/null 2>&1" do_nodes $(comma_list $(osts_nodes)) \ "cp $SK_PATH/$FSNAME-s2s-server.key \ - $SK_PATH/$FSNAME-s2s-client.key; lgss_sk \ + $SK_PATH/$FSNAME-s2s-client.key; $LGSS_SK \ -t client -m $SK_PATH/$FSNAME-s2s-client.key \ >/dev/null 2>&1" - do_nodes $clients "lgss_sk -t client \ + do_nodes $clients "$LGSS_SK -t client \ -m $SK_PATH/$FSNAME-nmclient.key \ >/dev/null 2>&1" fi @@ -1164,6 +1522,9 @@ init_gss() { lctl set_param -n \ sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG" fi + + do_nodesv $(comma_list $(all_server_nodes)) \ + "$LCTL set_param sptlrpc.gss.rsi_upcall=$L_GETAUTH" } cleanup_gss() { @@ -1262,8 +1623,8 @@ node_fstypes() { local facet for facet in ${facets//,/ }; do - if [ $node == $(facet_host $facet) ] || - [ $node == "$(facet_failover_host $facet)" ]; then + if [[ $node == $(facet_host $facet) ]] || + [[ $node == "$(facet_failover_host $facet)" ]]; then fstype=$(facet_fstype $facet) if [[ $fstypes != *$fstype* ]]; then fstypes+="${fstypes:+,}$fstype" @@ -1311,20 +1672,6 @@ devicelabel() { echo -n $label } -mdsdevlabel() { - local num=$1 - local device=$(mdsdevname $num) - local label=$(devicelabel mds$num ${device} | grep -v "CMD: ") - echo -n $label -} - -ostdevlabel() { - local num=$1 - local device=$(ostdevname $num) - local label=$(devicelabel ost$num ${device} | grep -v "CMD: ") - echo -n $label -} - # # Get the device of a facet. # @@ -1374,7 +1721,15 @@ running_in_vm() { case $virt in VMware|KVM|VirtualBox|Parallels|Bochs) - echo $virt | tr '[A-Z]' '[a-z]' ;; + echo $virt | tr '[A-Z]' '[a-z]' && return;; + + *) ;; + esac + + virt=$(dmidecode -s system-manufacturer | awk '{print $1}') + case $virt in + QEMU) + echo $virt | tr '[A-Z]' '[a-z]' && return;; *) ;; esac } @@ -1557,8 +1912,7 @@ get_osd_param() { local device=${2:-$FSNAME-OST*} local name=$3 - do_nodes $nodes "$LCTL get_param -n obdfilter.$device.$name \ - osd-*.$device.$name 2>&1" | grep -v 'error:' + do_nodes $nodes "$LCTL get_param -n osd-*.$device.$name" } set_osd_param() { @@ -1567,25 +1921,7 @@ set_osd_param() { local name=$3 local value=$4 - do_nodes $nodes "$LCTL set_param -n obdfilter.$device.$name=$value \ - osd-*.$device.$name=$value 2>&1" | grep -v 'error:' -} - -set_debug_size () { - local dz=${1:-$DEBUG_SIZE} - - if [ -f /sys/devices/system/cpu/possible ]; then - local cpus=$(($(cut -d "-" -f 2 /sys/devices/system/cpu/possible)+1)) - else - local cpus=$(getconf _NPROCESSORS_CONF 2>/dev/null) - fi - - # bug 19944, adjust size to be -gt num_possible_cpus() - # promise 2MB for every cpu at least - if [ -n "$cpus" ] && [ $((cpus * 2)) -gt $dz ]; then - dz=$((cpus * 2)) - fi - lctl set_param debug_mb=$dz + do_nodes $nodes "$LCTL set_param -n osd-*.$device.$name=$value" } set_default_debug () { @@ -1594,9 +1930,12 @@ set_default_debug () { local debug_size=${3:-$DEBUG_SIZE} [ -n "$debug" ] && lctl set_param debug="$debug" >/dev/null - [ -n "$subsys" ] && lctl set_param subsystem_debug="${subsys# }" >/dev/null + [ -n "$subsys" ] && + lctl set_param subsystem_debug="${subsys# }" >/dev/null + [ -n "$debug_size" ] && + lctl set_param debug_mb="$debug_size" >/dev/null - [ -n "$debug_size" ] && set_debug_size $debug_size > /dev/null + return 0 } set_default_debug_nodes () { @@ -1627,38 +1966,67 @@ set_default_debug_facet () { set_default_debug_nodes $node "$debug" "$subsys" $debug_size } -set_params_nodes () { - [[ $# -ge 2 ]] || return 0 - +set_params_nodes() { local nodes=$1 shift - do_nodes $nodes $LCTL set_param $@ + local params="$@" + + [[ -n "$params" ]] || return 0 + + do_nodes $nodes "$LCTL set_param $params" } -set_params_clients () { +set_params_clients() { + (( $# >= 2 )) || return 0 local clients=${1:-$CLIENTS} - local params=${2:-$CLIENT_LCTL_SETPARAM_PARAM} + shift + local params="${@:-$CLIENT_LCTL_SETPARAM_PARAM}" - [[ -n $params ]] || return 0 set_params_nodes $clients $params } +set_params_mdts() { + (( $# >= 2 )) || return 0 + local mdts=${1:-$(comma_list $(mdts_nodes))} + shift + local params="${@:-$MDS_LCTL_SETPARAM_PARAM}" + + set_params_nodes $mdts $params +} + +set_params_osts() { + (( $# >= 2 )) || return 0 + local osts=${1:-$(comma_list $(osts_nodes))} + shift + local params="${@:-$OSS_LCTL_SETPARAM_PARAM}" + + set_params_nodes $osts $params +} + set_hostid () { - local hostid=${1:-$(hostid)} + local hostid=${1:-$(hostid)} - if [ ! -s /etc/hostid ]; then - printf $(echo -n $hostid | + if [ ! -s /etc/hostid ]; then + printf $(echo -n $hostid | sed 's/\(..\)\(..\)\(..\)\(..\)/\\x\4\\x\3\\x\2\\x\1/') >/etc/hostid - fi + fi } # Facet functions mount_facets () { local facets=${1:-$(get_facets)} local facet + local -a mountpids + local total=0 + local ret=0 for facet in ${facets//,/ }; do - mount_facet $facet + mount_facet $facet & + mountpids[total]=$! + total=$((total+1)) + done + for ((index=0; index<$total; index++)); do + wait ${mountpids[index]} local RC=$? [ $RC -eq 0 ] && continue @@ -1668,8 +2036,9 @@ mount_facets () { else error "Restart of $facet failed!" fi - return $RC + ret=$RC done + return $ret } # @@ -2037,6 +2406,9 @@ mount_facet() { local devicelabel local dm_dev=${!dev} + [[ $dev == "mgsfailover_dev" ]] && combined_mgs_mds && + dev=mds1failover_dev + module_loaded lustre || load_modules case $fstype in @@ -2064,9 +2436,15 @@ mount_facet() { if [ -f $TMP/test-lu482-trigger ]; then RC=2 else + local seq_width=$(($OSTSEQWIDTH / $OSTCOUNT)) + (( $seq_width >= 16384 )) || seq_width=16384 do_facet ${facet} \ "mkdir -p $mntpt; $MOUNT_CMD $opts $dm_dev $mntpt" RC=${PIPESTATUS[0]} + if [[ ${facet} =~ ost ]]; then + do_facet ${facet} "$LCTL set_param \ + seq.cli-$(devicelabel $facet $dm_dev)-super.width=$seq_width" + fi fi if [ $RC -ne 0 ]; then @@ -2127,13 +2505,19 @@ start() { local dev_alias=$(facet_device_alias $facet) eval export ${dev_alias}_dev=${device} - eval export ${facet}_opt=\"$@\" + eval export ${facet}_opt=\"$*\" + + combined_mgs_mds && [[ ${dev_alias} == mds1 ]] && + eval export mgs_dev=${device} local varname=${dev_alias}failover_dev if [ -n "${!varname}" ] ; then eval export ${dev_alias}failover_dev=${!varname} else eval export ${dev_alias}failover_dev=$device + combined_mgs_mds && [[ ${dev_alias} == mds1 ]] && + eval export mgsfailover_dev=${device} + fi local mntpt=$(facet_mntpt $facet) @@ -2155,8 +2539,8 @@ stop() { local mntpt=$(facet_mntpt $facet) running=$(do_facet ${facet} "grep -c $mntpt' ' /proc/mounts || true") if [ ${running} -ne 0 ]; then - echo "Stopping $mntpt (opts:$@) on $HOST" - do_facet ${facet} $UMOUNT $@ $mntpt + echo "Stopping $mntpt (opts:$*) on $HOST" + do_facet ${facet} $UMOUNT "$@" $mntpt fi # umount should block, but we should wait for unrelated obd's @@ -2193,20 +2577,32 @@ ost_quota_type() { # restore old quota type settings restore_quota() { - if [ "$old_MDT_QUOTA_TYPE" ]; then - if [[ $PERM_CMD == *"set_param -P"* ]]; then - do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-MDT*.quota_slave.enable = \ - $old_MDT_QUOTA_TYPE - else - do_facet mgs $PERM_CMD \ + for usr in $QUOTA_USERS; do + echo "Setting up quota on $HOSTNAME:$MOUNT for $usr..." + for type in u g; do + cmd="$LFS setquota -$type $usr -b 0" + cmd="$cmd -B 0 -i 0 -I 0 $MOUNT" + echo "+ $cmd" + eval $cmd || error "$cmd FAILED!" + done + # display the quota status + echo "Quota settings for $usr : " + $LFS quota -v -u $usr $MOUNT || true + done + if [ "$old_MDT_QUOTA_TYPE" ]; then + if [[ $PERM_CMD == *"set_param -P"* ]]; then + do_facet mgs $PERM_CMD \ + osd-*.$FSNAME-MDT*.quota_slave.enabled = \ + $old_MDT_QUOTA_TYPE + else + do_facet mgs $PERM_CMD \ $FSNAME.quota.mdt=$old_MDT_QUOTA_TYPE fi fi if [ "$old_OST_QUOTA_TYPE" ]; then if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-OST*.quota_slave.enable = \ + osd-*.$FSNAME-OST*.quota_slave.enabled = \ $old_OST_QUOTA_TYPE else do_facet mgs $LCTL conf_param \ @@ -2220,6 +2616,7 @@ restore_quota() { # This will allow fixing the "lfs df" summary line in the future. lfs_df() { $LFS df $* | sed -e 's/filesystem /filesystem_/' + check_lfs_df_ret_val ${PIPESTATUS[0]} } # Get free inodes on the MDT specified by mdt index, free indoes on @@ -2267,9 +2664,9 @@ setup_quota(){ if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-MDT*.quota_slave.enable=$QUOTA_TYPE + osd-*.$FSNAME-MDT*.quota_slave.enabled=$QUOTA_TYPE do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-OST*.quota_slave.enable=$QUOTA_TYPE + osd-*.$FSNAME-OST*.quota_slave.enabled=$QUOTA_TYPE else do_facet mgs $PERM_CMD $FSNAME.quota.mdt=$QUOTA_TYPE || error "set mdt quota type failed" @@ -2390,75 +2787,75 @@ zconf_umount() { # Mount the file system on the MDS mount_mds_client() { - local mds_HOST=${SINGLEMDS}_HOST - echo $mds_HOST - zconf_mount $mds1_HOST $MOUNT2 $MOUNT_OPTS || - error "unable to mount $MOUNT2 on MDS" + local host=$(facet_active_host $SINGLEMDS) + echo $host + zconf_mount $host $MOUNT2 $MOUNT_OPTS || + error "unable to mount $MOUNT2 on $host" } # Unmount the file system on the MDS umount_mds_client() { - local mds_HOST=${SINGLEMDS}_HOST - zconf_umount $mds1_HOST $MOUNT2 + local host=$(facet_active_host $SINGLEMDS) + zconf_umount $host $MOUNT2 do_facet $SINGLEMDS "rmdir $MOUNT2" } # nodes is comma list sanity_mount_check_nodes () { - local nodes=$1 - shift - local mnts="$@" - local mnt + local nodes=$1 + shift + local mnts="$@" + local mnt - # FIXME: assume that all cluster nodes run the same os - [ "$(uname)" = Linux ] || return 0 + # FIXME: assume that all cluster nodes run the same os + [ "$(uname)" = Linux ] || return 0 - local rc=0 - for mnt in $mnts ; do - do_nodes $nodes "running=\\\$(grep -c $mnt' ' /proc/mounts); + local rc=0 + for mnt in $mnts ; do + do_nodes $nodes "running=\\\$(grep -c $mnt' ' /proc/mounts); mpts=\\\$(mount | grep -c $mnt' '); if [ \\\$running -ne \\\$mpts ]; then echo \\\$(hostname) env are INSANE!; exit 1; fi" - [ $? -eq 0 ] || rc=1 - done - return $rc + [ $? -eq 0 ] || rc=1 + done + return $rc } sanity_mount_check_servers () { - [ -n "$CLIENTONLY" ] && - { echo "CLIENTONLY mode, skip mount_check_servers"; return 0; } || true - echo Checking servers environments - - # FIXME: modify get_facets to display all facets wo params - local facets="$(get_facets OST),$(get_facets MDS),mgs" - local node - local mntpt - local facet - for facet in ${facets//,/ }; do - node=$(facet_host ${facet}) - mntpt=$(facet_mntpt $facet) - sanity_mount_check_nodes $node $mntpt || - { error "server $node environments are insane!"; return 1; } - done + [ -n "$CLIENTONLY" ] && + { echo "CLIENTONLY mode, skip mount_check_servers"; return 0; } || true + echo Checking servers environments + + # FIXME: modify get_facets to display all facets wo params + local facets="$(get_facets OST),$(get_facets MDS),mgs" + local node + local mntpt + local facet + for facet in ${facets//,/ }; do + node=$(facet_host ${facet}) + mntpt=$(facet_mntpt $facet) + sanity_mount_check_nodes $node $mntpt || + { error "server $node environments are insane!"; return 1; } + done } sanity_mount_check_clients () { - local clients=${1:-$CLIENTS} - local mntpt=${2:-$MOUNT} - local mntpt2=${3:-$MOUNT2} + local clients=${1:-$CLIENTS} + local mntpt=${2:-$MOUNT} + local mntpt2=${3:-$MOUNT2} - [ -z $clients ] && clients=$(hostname) - echo Checking clients $clients environments + [ -z $clients ] && clients=$(hostname) + echo Checking clients $clients environments - sanity_mount_check_nodes $clients $mntpt $mntpt2 || - error "clients environments are insane!" + sanity_mount_check_nodes $clients $mntpt $mntpt2 || + error "clients environments are insane!" } sanity_mount_check () { - sanity_mount_check_servers || return 1 - sanity_mount_check_clients || return 2 + sanity_mount_check_servers || return 1 + sanity_mount_check_clients || return 2 } # mount clients if not mouted @@ -2483,8 +2880,8 @@ zconf_mount_clients() { local i=0 # Mount all server nodes first with per-NM keys for nmclient in ${clients//,/ }; do -# do_nodes $(comma_list $(all_server_nodes)) "lgss_sk -t server -l $SK_PATH/nodemap/c$i.key -n c$i" - do_nodes $(comma_list $(all_server_nodes)) "lgss_sk -t server -l $SK_PATH/nodemap/c$i.key" + do_nodes $(comma_list $(all_server_nodes)) \ + "$LGSS_SK -t server -l $SK_PATH/nodemap/c$i.key" i=$((i + 1)) done # set perms for per-nodemap keys else permission denied @@ -2500,7 +2897,7 @@ zconf_mount_clients() { fi do_node $nmclient "! grep -q $mnt' ' \ /proc/mounts || umount $mnt" - local prunedopts=$(add_sk_mntflag $prunedopts); + local prunedopts=$(add_sk_mntflag $opts); prunedopts=$(echo $prunedopts | sed -e \ "s#skpath=[^ ^,]*#skpath=$mountkey#g") set -x @@ -2588,14 +2985,14 @@ exit \\\$rc" || return ${PIPESTATUS[0]} } zconf_umount_clients() { - local clients=$1 - local mnt=$2 - local force + local clients=$1 + local mnt=$2 + local force - [ "$3" ] && force=-f + [ "$3" ] && force=-f - echo "Stopping clients: $clients $mnt (opts:$force)" - do_nodes $clients "running=\\\$(grep -c $mnt' ' /proc/mounts); + echo "Stopping clients: $clients $mnt (opts:$force)" + do_nodes $clients "running=\\\$(grep -c $mnt' ' /proc/mounts); if [ \\\$running -ne 0 ] ; then echo Stopping client \\\$(hostname) $mnt opts:$force; lsof $mnt || need_kill=no; @@ -2612,51 +3009,53 @@ fi" } shutdown_node () { - local node=$1 - echo + $POWER_DOWN $node - $POWER_DOWN $node + local node=$1 + + echo + $POWER_DOWN $node + $POWER_DOWN $node } shutdown_node_hard () { - local host=$1 - local attempts=$SHUTDOWN_ATTEMPTS + local host=$1 + local attempts=$SHUTDOWN_ATTEMPTS - for i in $(seq $attempts) ; do - shutdown_node $host - sleep 1 - wait_for_function --quiet "! ping -w 3 -c 1 $host" 5 1 && return 0 - echo "waiting for $host to fail attempts=$attempts" - [ $i -lt $attempts ] || \ - { echo "$host still pingable after power down! attempts=$attempts" && return 1; } - done + for i in $(seq $attempts) ; do + shutdown_node $host + sleep 1 + wait_for_function --quiet "! ping -w 3 -c 1 $host" 5 1 && + return 0 + echo "waiting for $host to fail attempts=$attempts" + [ $i -lt $attempts ] || + { echo "$host still pingable after power down! attempts=$attempts" && return 1; } + done } shutdown_client() { - local client=$1 - local mnt=${2:-$MOUNT} - local attempts=3 - - if [ "$FAILURE_MODE" = HARD ]; then - shutdown_node_hard $client - else - zconf_umount_clients $client $mnt -f - fi + local client=$1 + local mnt=${2:-$MOUNT} + local attempts=3 + + if [ "$FAILURE_MODE" = HARD ]; then + shutdown_node_hard $client + else + zconf_umount_clients $client $mnt -f + fi } facets_on_host () { - local host=$1 - local facets="$(get_facets OST),$(get_facets MDS)" - local affected + local affected + local host=$1 + local facets="$(get_facets OST),$(get_facets MDS)" - combined_mgs_mds || facets="$facets,mgs" + combined_mgs_mds || facets="$facets,mgs" - for facet in ${facets//,/ }; do - if [ $(facet_active_host $facet) == $host ]; then - affected="$affected $facet" - fi - done + for facet in ${facets//,/ }; do + if [ $(facet_active_host $facet) == $host ]; then + affected="$affected $facet" + fi + done - echo $(comma_list $affected) + echo $(comma_list $affected) } facet_up() { @@ -2668,17 +3067,17 @@ facet_up() { } facets_up_on_host () { - local host=$1 - local facets=$(facets_on_host $host) - local affected_up + local affected_up + local host=$1 + local facets=$(facets_on_host $host) - for facet in ${facets//,/ }; do - if $(facet_up $facet $host); then - affected_up="$affected_up $facet" - fi - done + for facet in ${facets//,/ }; do + if $(facet_up $facet $host); then + affected_up="$affected_up $facet" + fi + done - echo $(comma_list $affected_up) + echo $(comma_list $affected_up) } shutdown_facet() { @@ -2702,26 +3101,28 @@ shutdown_facet() { } reboot_node() { - local node=$1 - echo + $POWER_UP $node - $POWER_UP $node + local node=$1 + + echo + $POWER_UP $node + $POWER_UP $node } remount_facet() { - local facet=$1 + local facet=$1 - stop $facet - mount_facet $facet + stop $facet + mount_facet $facet } reboot_facet() { local facet=$1 local node=$(facet_active_host $facet) + local sleep_time=${2:-10} if [ "$FAILURE_MODE" = HARD ]; then boot_node $node else - sleep 10 + sleep $sleep_time fi } @@ -2739,34 +3140,34 @@ boot_node() { } facets_hosts () { - local facets=$1 - local hosts + local hosts + local facets=$1 - for facet in ${facets//,/ }; do - hosts=$(expand_list $hosts $(facet_host $facet) ) - done + for facet in ${facets//,/ }; do + hosts=$(expand_list $hosts $(facet_host $facet) ) + done - echo $hosts + echo $hosts } _check_progs_installed () { - local progs=$@ - local rc=0 + local progs=$@ + local rc=0 - for prog in $progs; do - if ! [ "$(which $prog)" -o "${!prog}" ]; then - echo $prog missing on $(hostname) - rc=1 - fi - done - return $rc + for prog in $progs; do + if ! [ "$(which $prog)" -o "${!prog}" ]; then + echo $prog missing on $(hostname) + rc=1 + fi + done + return $rc } check_progs_installed () { local nodes=$1 shift - do_rpc_nodes "$nodes" _check_progs_installed $@ + do_rpc_nodes "$nodes" _check_progs_installed "$@" } # recovery-scale functions @@ -2883,141 +3284,115 @@ check_client_load () { return $RC } check_client_loads () { - local clients=${1//,/ } - local client= - local rc=0 + local clients=${1//,/ } + local client= + local rc=0 - for client in $clients; do - check_client_load $client - rc=${PIPESTATUS[0]} - if [ "$rc" != 0 ]; then - log "Client load failed on node $client, rc=$rc" - return $rc - fi - done + for client in $clients; do + check_client_load $client + rc=${PIPESTATUS[0]} + if [ "$rc" != 0 ]; then + log "Client load failed on node $client, rc=$rc" + return $rc + fi + done } restart_client_loads () { - local clients=${1//,/ } - local expectedfail=${2:-""} - local client= - local rc=0 - - for client in $clients; do - check_client_load $client - rc=${PIPESTATUS[0]} - if [ "$rc" != 0 -a "$expectedfail" ]; then - local var=$(node_var_name $client)_load - start_client_load $client ${!var} - echo "Restarted client load ${!var}: on $client. Checking ..." - check_client_load $client - rc=${PIPESTATUS[0]} - if [ "$rc" != 0 ]; then - log "Client load failed to restart on node $client, rc=$rc" - # failure one client load means test fail - # we do not need to check other - return $rc - fi - else - return $rc - fi - done + local clients=${1//,/ } + local expectedfail=${2:-""} + local client= + local rc=0 + + for client in $clients; do + check_client_load $client + rc=${PIPESTATUS[0]} + if [ "$rc" != 0 -a "$expectedfail" ]; then + local var=$(node_var_name $client)_load + + start_client_load $client ${!var} + echo "Restarted client load ${!var}: on $client. Checking ..." + check_client_load $client + rc=${PIPESTATUS[0]} + if [ "$rc" != 0 ]; then + log "Client load failed to restart on node $client, rc=$rc" + # failure one client load means test fail + # we do not need to check other + return $rc + fi + else + return $rc + fi + done } # Start vmstat and save its process ID in a file. start_vmstat() { - local nodes=$1 - local pid_file=$2 + local nodes=$1 + local pid_file=$2 - [ -z "$nodes" -o -z "$pid_file" ] && return 0 + [ -z "$nodes" -o -z "$pid_file" ] && return 0 - do_nodes $nodes \ + do_nodes $nodes \ "vmstat 1 > $TESTLOG_PREFIX.$TESTNAME.vmstat.\\\$(hostname -s).log \ 2>/dev/null $pid_file" } # Display the nodes on which client loads failed. print_end_run_file() { - local file=$1 - local node + local file=$1 + local node - [ -s $file ] || return 0 + [ -s $file ] || return 0 - echo "Found the END_RUN_FILE file: $file" - cat $file + echo "Found the END_RUN_FILE file: $file" + cat $file - # A client load will stop if it finds the END_RUN_FILE file. - # That does not mean the client load actually failed though. - # The first node in END_RUN_FILE is the one we are interested in. - read node < $file + # A client load will stop if it finds the END_RUN_FILE file. + # That does not mean the client load actually failed though. + # The first node in END_RUN_FILE is the one we are interested in. + read node < $file - if [ -n "$node" ]; then - local var=$(node_var_name $node)_load + if [ -n "$node" ]; then + local var=$(node_var_name $node)_load - local prefix=$TESTLOG_PREFIX - [ -n "$TESTNAME" ] && prefix=$prefix.$TESTNAME - local stdout_log=$prefix.run_${!var}_stdout.$node.log - local debug_log=$(echo $stdout_log | sed 's/\(.*\)stdout/\1debug/') + local prefix=$TESTLOG_PREFIX + [ -n "$TESTNAME" ] && prefix=$prefix.$TESTNAME + local stdout_log=$prefix.run_${!var}_stdout.$node.log + local debug_log=$(echo $stdout_log | + sed 's/\(.*\)stdout/\1debug/') - echo "Client load ${!var} failed on node $node:" - echo "$stdout_log" - echo "$debug_log" - fi + echo "Client load ${!var} failed on node $node:" + echo "$stdout_log" + echo "$debug_log" + fi } # Stop the process which had its PID saved in a file. stop_process() { - local nodes=$1 - local pid_file=$2 + local nodes=$1 + local pid_file=$2 - [ -z "$nodes" -o -z "$pid_file" ] && return 0 + [ -z "$nodes" -o -z "$pid_file" ] && return 0 - do_nodes $nodes "test -f $pid_file && - { kill -s TERM \\\$(cat $pid_file); rm -f $pid_file; }" || true + do_nodes $nodes "test -f $pid_file && + { kill -s TERM \\\$(cat $pid_file); rm -f $pid_file; }" || true } # Stop all client loads. stop_client_loads() { - local nodes=${1:-$CLIENTS} - local pid_file=$2 + local nodes=${1:-$CLIENTS} + local pid_file=$2 - # stop the client loads - stop_process $nodes $pid_file + # stop the client loads + stop_process $nodes $pid_file - # clean up the processes that started them - [ -n "$CLIENT_LOAD_PIDS" ] && kill -9 $CLIENT_LOAD_PIDS 2>/dev/null || true + # clean up the processes that started them + [ -n "$CLIENT_LOAD_PIDS" ] && + kill -9 $CLIENT_LOAD_PIDS 2>/dev/null || true } # End recovery-scale functions -# verify that lustre actually cleaned up properly -cleanup_check() { - VAR=$(lctl get_param -n catastrophe 2>&1) - if [ $? = 0 ] ; then - if [ $VAR != 0 ]; then - error "LBUG/LASSERT detected" - fi - fi - BUSY=$(dmesg | grep -i destruct || true) - if [ -n "$BUSY" ]; then - echo "$BUSY" 1>&2 - [ -e $TMP/debug ] && mv $TMP/debug $TMP/debug-busy.$(date +%s) - exit 205 - fi - - check_mem_leak || exit 204 - - [[ $($LCTL dl 2>/dev/null | wc -l) -gt 0 ]] && $LCTL dl && - echo "$TESTSUITE: lustre didn't clean up..." 1>&2 && - return 202 || true - - if module_loaded lnet || module_loaded libcfs; then - echo "$TESTSUITE: modules still loaded..." 1>&2 - /sbin/lsmod 1>&2 - return 203 - fi - return 0 -} - ## # wait for a command to return the expected result # @@ -3032,15 +3407,21 @@ cleanup_check() { # If --verbose is passed as the first argument, the result is printed on each # value change, otherwise it is only printed after every 10s interval. # +# If --quiet is passed as the first/second argument, the do_node() command +# will not print the remote command before executing it each time. +# # Using wait_update_cond() or related helper function is preferable to adding # a "long enough" wait for some state to change in the background, since # "long enough" may be too short due to tunables, system config, or running in # a VM, and must by necessity wait too long for most cases or risk failure. # -# usage: wait_update_cond [--verbose] node check cond expect [max_wait] +# usage: wait_update_cond [--verbose] [--quiet] node check cond expect [max_wait] wait_update_cond() { - local verbose=false - [[ "$1" == "--verbose" ]] && verbose=true && shift + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node=$1 local check="$2" @@ -3055,45 +3436,58 @@ wait_update_cond() { local print=10 while (( $waited <= $max_wait )); do - result=$(do_node $node "$check") + result=$(do_node $quiet $node "$check") eval [[ "'$result'" $cond "'$expect'" ]] if [[ $? == 0 ]]; then + [[ -n "$quiet" ]] && return 0 [[ -z "$result" || $waited -le $sleep ]] || echo "Updated after ${waited}s: want '$expect' got '$result'" return 0 fi - if $verbose && [[ "$result" != "$prev_result" ]]; then - [[ -n "$prev_result" ]] && + if [[ -n "$verbose" && "$result" != "$prev_result" ]]; then + [[ -z "$quiet" && -n "$prev_result" ]] && echo "Changed after ${waited}s: from '$prev_result' to '$result'" prev_result="$result" fi - (( $waited % $print == 0 )) && + (( $waited % $print == 0 )) && { + [[ -z "$quiet" ]] && echo "Waiting $((max_wait - waited))s for '$expect'" + } + sleep $sleep waited=$((SECONDS - begin)) done + + [[ -z "$quiet" ]] && echo "Update not seen after ${max_wait}s: want '$expect' got '$result'" + return 3 } -# usage: wait_update [--verbose] node check expect [max_wait] +# usage: wait_update [--verbose] [--quiet] node check expect [max_wait] wait_update() { - local verbose= - [ "$1" = "--verbose" ] && verbose="$1" && shift + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node="$1" local check="$2" local expect="$3" local max_wait=$4 - wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait + wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait } # usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait] wait_update_facet_cond() { - local verbose= - [ "$1" = "--verbose" ] && verbose="$1" && shift + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node=$(facet_active_host $1) local check="$2" @@ -3101,20 +3495,23 @@ wait_update_facet_cond() { local expect="$4" local max_wait=$5 - wait_update_cond $verbose $node "$check" "$cond" "$expect" $max_wait + wait_update_cond $verbose $quiet $node "$check" "$cond" "$expect" $max_wait } # usage: wait_update_facet [--verbose] facet check expect [max_wait] wait_update_facet() { - local verbose= - [ "$1" = "--verbose" ] && verbose="$1" && shift + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node=$(facet_active_host $1) local check="$2" local expect="$3" local max_wait=$4 - wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait + wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait } sync_all_data() { @@ -3336,50 +3733,54 @@ wait_for_host() { } wait_for_facet() { - local facetlist=$1 - local hostlist + local facetlist=$1 + local hostlist - for facet in ${facetlist//,/ }; do - hostlist=$(expand_list $hostlist $(facet_active_host $facet)) - done - wait_for_host $hostlist + for facet in ${facetlist//,/ }; do + hostlist=$(expand_list $hostlist $(facet_active_host $facet)) + done + wait_for_host $hostlist } _wait_recovery_complete () { - local param=$1 + local param=$1 - # Use default policy if $2 is not passed by caller. - local MAX=${2:-$(max_recovery_time)} + # Use default policy if $2 is not passed by caller. + local MAX=${2:-$(max_recovery_time)} - local WAIT=0 - local STATUS= + local WAIT=0 + local STATUS= - while [ $WAIT -lt $MAX ]; do - STATUS=$(lctl get_param -n $param | grep status) - echo $param $STATUS - [[ $STATUS = "status: COMPLETE" || $STATUS = "status: INACTIVE" ]] && return 0 - sleep 5 - WAIT=$((WAIT + 5)) - echo "Waiting $((MAX - WAIT)) secs for $param recovery done. $STATUS" - done - echo "$param recovery not done in $MAX sec. $STATUS" - return 1 + while [ $WAIT -lt $MAX ]; do + STATUS=$(lctl get_param -n $param | grep status) + echo $param $STATUS + [[ $STATUS == "status: COMPLETE" || + $STATUS == "status: INACTIVE" ]] && return 0 + sleep 5 + WAIT=$((WAIT + 5)) + echo "Waiting $((MAX - WAIT)) secs for $param recovery done. $STATUS" + done + echo "$param recovery not done in $MAX sec. $STATUS" + return 1 } wait_recovery_complete () { - local facet=$1 + local facet=$1 - # with an assumption that at_max is the same on all nodes - local MAX=${2:-$(max_recovery_time)} + # with an assumption that at_max is the same on all nodes + local MAX=${2:-$(max_recovery_time)} - local facets=$facet - if [ "$FAILURE_MODE" = HARD ]; then - facets=$(facets_on_host $(facet_active_host $facet)) - fi - echo affected facets: $facets + local facets=$facet + if [ "$FAILURE_MODE" = HARD ]; then + facets=$(facets_on_host $(facet_active_host $facet)) + fi + echo affected facets: $facets - # we can use "for" here because we are waiting the slowest - for facet in ${facets//,/ }; do + facets=${facets//,/ } + # We can use "for" here because we are waiting the slowest. + # The mgs not having the recovery_status proc entry, exclude it + # from the facet list. + for facet in ${facets//mgs/ }; do local var_svc=${facet}_svc local param="*.${!var_svc}.recovery_status" @@ -3450,14 +3851,15 @@ wait_osts_up() { } wait_destroy_complete () { - echo "Waiting for local destroys to complete" + echo "Waiting for MDT destroys to complete" # MAX value shouldn't be big as this mean server responsiveness # never increase this just to make test pass but investigate # why it takes so long time - local MAX=5 + local MAX=${1:-5} local WAIT=0 + local list=$(comma_list $(mdts_nodes)) while [ $WAIT -lt $MAX ]; do - local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight)) + local -a RPCs=($(do_nodes $list $LCTL get_param -n osp.*.destroys_in_flight)) local con=1 local i @@ -3467,12 +3869,12 @@ wait_destroy_complete () { con=0 break; done - sleep 1 [ ${con} -eq 1 ] && return 0 # done waiting + sleep 1 echo "Waiting ${WAIT}s for local destroys to complete" WAIT=$((WAIT + 1)) done - echo "Local destroys weren't done in $MAX sec." + echo "MDT destroys weren't done in $MAX sec." return 1 } @@ -3482,67 +3884,81 @@ wait_delete_completed() { } wait_exit_ST () { - local facet=$1 - - local WAIT=0 - local INTERVAL=1 - local running - # conf-sanity 31 takes a long time cleanup - while [ $WAIT -lt 300 ]; do - running=$(do_facet ${facet} "lsmod | grep lnet > /dev/null && + local facet=$1 + + local WAIT=0 + local INTERVAL=1 + local running + # conf-sanity 31 takes a long time cleanup + while [ $WAIT -lt 300 ]; do + running=$(do_facet ${facet} "lsmod | grep lnet > /dev/null && lctl dl | grep ' ST ' || true") - [ -z "${running}" ] && return 0 - echo "waited $WAIT for${running}" - [ $INTERVAL -lt 64 ] && INTERVAL=$((INTERVAL + INTERVAL)) - sleep $INTERVAL - WAIT=$((WAIT + INTERVAL)) - done - echo "service didn't stop after $WAIT seconds. Still running:" - echo ${running} - return 1 + [ -z "${running}" ] && return 0 + echo "waited $WAIT for${running}" + [ $INTERVAL -lt 64 ] && INTERVAL=$((INTERVAL + INTERVAL)) + sleep $INTERVAL + WAIT=$((WAIT + INTERVAL)) + done + echo "service didn't stop after $WAIT seconds. Still running:" + echo ${running} + return 1 } wait_remote_prog () { - local prog=$1 - local WAIT=0 - local INTERVAL=5 - local rc=0 - - [ "$PDSH" = "no_dsh" ] && return 0 - - while [ $WAIT -lt $2 ]; do - running=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep) || true - [ -z "${running}" ] && return 0 || true - echo "waited $WAIT for: " - echo "$running" - [ $INTERVAL -lt 60 ] && INTERVAL=$((INTERVAL + INTERVAL)) - sleep $INTERVAL - WAIT=$((WAIT + INTERVAL)) - done - local pids=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep | awk '{print $2}') - [ -z "$pids" ] && return 0 - echo "$PDSH processes still exists after $WAIT seconds. Still running: $pids" - # FIXME: not portable - for pid in $pids; do - cat /proc/${pid}/status || true - cat /proc/${pid}/wchan || true - echo "Killing $pid" - kill -9 $pid || true - sleep 1 - ps -P $pid && rc=1 - done - - return $rc + local prog=$1 + local WAIT=0 + local INTERVAL=5 + local rc=0 + + [ "$PDSH" = "no_dsh" ] && return 0 + + while [ $WAIT -lt $2 ]; do + running=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | + grep -v grep) || true + [ -z "${running}" ] && return 0 || true + echo "waited $WAIT for: " + echo "$running" + [ $INTERVAL -lt 60 ] && INTERVAL=$((INTERVAL + INTERVAL)) + sleep $INTERVAL + WAIT=$((WAIT + INTERVAL)) + done + local pids=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | + grep -v grep | awk '{print $2}') + [ -z "$pids" ] && return 0 + echo "$PDSH processes still exists after $WAIT seconds. Still running: $pids" + # FIXME: not portable + for pid in $pids; do + cat /proc/${pid}/status || true + cat /proc/${pid}/wchan || true + echo "Killing $pid" + kill -9 $pid || true + sleep 1 + ps -P $pid && rc=1 + done + + return $rc } -lfs_df_check() { +_lfs_df_check() { local clients=${1:-$CLIENTS} + local rc=0 - if [ -z "$clients" ]; then - $LFS df $MOUNT + if [[ -z "$clients" ]]; then + $LFS df $MOUNT > /dev/null || rc=$? else - $PDSH $clients "$LFS df $MOUNT" > /dev/null + $PDSH $clients "$LFS df $MOUNT" > /dev/null || rc=$? fi + + return $rc +} + +lfs_df_check() { + local clients=${1:-$CLIENTS} + local rc=0 + + _lfs_df_check "$clients" || rc=$? + + check_lfs_df_ret_val $rc } clients_up() { @@ -3551,6 +3967,21 @@ clients_up() { lfs_df_check } +all_mds_up() { + (( MDSCOUNT == 1 )) && return + + # wait so that statfs data on MDT expire + local delay=$(do_facet mds1 $LCTL \ + get_param -n osp.*MDT*MDT0000.maxage | sort -n | tail -1) + + [ -n "$delay" ] || error "fail to get maxage" + sleep $delay + local nodes=$(comma_list $(mdts_nodes)) + # initiate statfs RPC, all to all MDTs + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null +} + client_up() { # usually checked on particular client or locally sleep 1 @@ -3558,7 +3989,8 @@ client_up() { } client_evicted() { - ! client_up $1 + sleep 1 + ! _lfs_df_check $1 } client_reconnect_try() { @@ -3584,15 +4016,15 @@ client_reconnect() { } affected_facets () { - local facet=$1 + local facet=$1 - local host=$(facet_active_host $facet) - local affected=$facet + local host=$(facet_active_host $facet) + local affected=$facet - if [ "$FAILURE_MODE" = HARD ]; then - affected=$(facets_up_on_host $host) - fi - echo $affected + if [ "$FAILURE_MODE" = HARD ]; then + affected=$(facets_up_on_host $host) + fi + echo $affected } facet_failover() { @@ -3620,7 +4052,7 @@ facet_failover() { skip=0 #check whether facet has been included in other affected facets for ((index=0; index<$total; index++)); do - [[ *,$facet,* == ,${affecteds[index]}, ]] && skip=1 + [[ ,${affecteds[index]}, == *,$facet,* ]] && skip=1 done if [ $skip -eq 0 ]; then @@ -3636,18 +4068,52 @@ facet_failover() { shutdown_facet $facet done - $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \ - $(mdsdevname 1) "-n" || error "Running e2fsck") + echo "$(date +'%H:%M:%S (%s)') shut down" - for ((index=0; index<$total; index++)); do - facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1) - echo reboot facets: ${affecteds[index]} + local hostlist + local waithostlist + + for facet in ${facets//,/ }; do + local host=$(facet_active_host $facet) + + hostlist=$(expand_list $hostlist $host) + if [ $(facet_host $facet) = \ + $(facet_failover_host $facet) ]; then + waithostlist=$(expand_list $waithostlist $host) + fi + done + + if [ "$FAILURE_MODE" = HARD ]; then + for host in ${hostlist//,/ }; do + reboot_node $host + done + echo "$(date +'%H:%M:%S (%s)') $hostlist rebooted" + # We need to wait the rebooted hosts in case if + # facet_HOST == facetfailover_HOST + if ! [ -z "$waithostlist" ]; then + wait_for_host $waithostlist + if $LOAD_MODULES_REMOTE; then + echo "loading modules on $waithostlist" + do_rpc_nodes $waithostlist load_modules_local + fi + fi + else + sleep 10 + fi - reboot_facet $facet + if [[ " ${affecteds[@]} " =~ " $SINGLEMDS " ]]; then + change_active $SINGLEMDS + fi + + $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \ + $(facet_device $SINGLEMDS) "-n" || error "Running e2fsck") - change_active ${affecteds[index]} + local -a mountpids - wait_for_facet ${affecteds[index]} + for ((index=0; index<$total; index++)); do + if [[ ${affecteds[index]} != $SINGLEMDS ]]; then + change_active ${affecteds[index]} + fi if $GSS_SK; then init_gss init_facets_vars_simple @@ -3656,11 +4122,20 @@ facet_failover() { if ! combined_mgs_mds && list_member ${affecteds[index]} mgs; then mount_facet mgs || error "Restart of mgs failed" + affecteds[index]=$(exclude_items_from_list \ + ${affecteds[index]} mgs) + fi + if [ -n "${affecteds[index]}" ]; then + echo mount facets: ${affecteds[index]} + mount_facets ${affecteds[index]} & + mountpids[index]=$! + fi + done + for ((index=0; index<$total; index++)); do + if [ -n "${affecteds[index]}" ]; then + wait ${mountpids[index]} fi - # FIXME; has to be changed to mount all facets concurrently - affected=$(exclude_items_from_list ${affecteds[index]} mgs) - echo mount facets: ${affecteds[index]} - mount_facets ${affecteds[index]} + if $GSS_SK; then do_nodes $(comma_list $(all_nodes)) \ "keyctl show | grep lustre | cut -c1-11 | @@ -3668,10 +4143,20 @@ facet_failover() { xargs -IX keyctl setperm X 0x3f3f3f3f" fi done -} + echo "$(date +'%H:%M:%S (%s)') targets are mounted" + + if [ "$FAILURE_MODE" = HARD ]; then + hostlist=$(exclude_items_from_list $hostlist $waithostlist) + if ! [ -z "$hostlist" ]; then + wait_for_host $hostlist + if $LOAD_MODULES_REMOTE; then + echo "loading modules on $hostlist" + do_rpc_nodes $hostlist load_modules_local + fi + fi + fi -obd_name() { - local facet=$1 + echo "$(date +'%H:%M:%S (%s)') facet_failover done" } replay_barrier() { @@ -3768,7 +4253,7 @@ fail() { export SK_NO_KEY=$SK_NO_KEY_save # to initiate all OSC idling connections clients_up - wait_clients_import_state "$clients" "$facets" "\(FULL\|IDLE\)" + wait_clients_import_ready "$clients" "$facets" clients_up || error "post-failover stat: $?" } @@ -3788,6 +4273,16 @@ fail_abort() { mount_facet $facet -o $abort_type clients_up || echo "first stat failed: $?" clients_up || error "post-failover stat: $?" + all_mds_up +} + +# LU-16159: abort recovery will cancel update logs, which may leave broken +# directories in the system, remove name entry if necessary +fail_abort_cleanup() { + rm -rf $DIR/$tdir/* + find $DIR/$tdir -depth | while read D; do + rmdir "$D" || $LFS rm_entry "$D" || error "rm $D failed" + done } host_nids_address() { @@ -3812,123 +4307,106 @@ h2nettype() { } declare -fx h2nettype -# Wrapper function to print the deprecation warning -h2tcp() { - echo "h2tcp: deprecated, use h2nettype instead" 1>&2 - if [[ -n "$NETTYPE" ]]; then - h2nettype "$@" - else - h2nettype "$1" "tcp" - fi -} - -# Wrapper function to print the deprecation warning -h2o2ib() { - echo "h2o2ib: deprecated, use h2nettype instead" 1>&2 - if [[ -n "$NETTYPE" ]]; then - h2nettype "$@" - else - h2nettype "$1" "o2ib" - fi -} - # This enables variables in cfg/"setup".sh files to support the pdsh HOSTLIST # expressions format. As a bonus we can then just pass in those variables # to pdsh. What this function does is take a HOSTLIST type string and # expand it into a space deliminated list for us. hostlist_expand() { - local hostlist=$1 - local offset=$2 - local myList - local item - local list - - [ -z "$hostlist" ] && return - - # Translate the case of [..],..,[..] to [..] .. [..] - list="${hostlist/],/] }" - front=${list%%[*} - [[ "$front" == *,* ]] && { - new="${list%,*} " - old="${list%,*}," - list=${list/${old}/${new}} - } - - for item in $list; do - # Test if we have any []'s at all - if [ "$item" != "${item/\[/}" ]; then { - # Expand the [*] into list - name=${item%%[*} - back=${item#*]} - - if [ "$name" != "$item" ]; then - group=${item#$name[*} - group=${group%%]*} - - for range in ${group//,/ }; do - local order - - begin=${range%-*} - end=${range#*-} - - # Number of leading zeros - padlen=${#begin} - padlen2=${#end} - end=$(echo $end | sed 's/0*//') - [[ -z "$end" ]] && end=0 - [[ $padlen2 -gt $padlen ]] && { - [[ $padlen2 -eq ${#end} ]] && padlen2=0 - padlen=$padlen2 - } - begin=$(echo $begin | sed 's/0*//') - [ -z $begin ] && begin=0 - - if [ ! -z "${begin##[!0-9]*}" ]; then - order=$(seq -f "%0${padlen}g" $begin $end) - else - order=$(eval echo {$begin..$end}); - fi - - for num in $order; do - value="${name#*,}${num}${back}" - [ "$value" != "${value/\[/}" ] && { - value=$(hostlist_expand "$value") - } - myList="$myList $value" - done - done - fi - } else { - myList="$myList $item" - } fi - done - myList=${myList//,/ } - myList=${myList:1} # Remove first character which is a space - - # Filter any duplicates without sorting - list="$myList " - myList="${list%% *}" - - while [[ "$list" != ${myList##* } ]]; do - local tlist=" $list" - list=${tlist// ${list%% *} / } - list=${list:1} - myList="$myList ${list%% *}" - done - myList="${myList%* }"; - - # We can select an object at an offset in the list - [ $# -eq 2 ] && { - cnt=0 - for item in $myList; do - let cnt=cnt+1 - [ $cnt -eq $offset ] && { - myList=$item - } - done - [ $(get_node_count $myList) -ne 1 ] && myList="" - } - echo $myList + local hostlist=$1 + local offset=$2 + local myList + local item + local list + + [ -z "$hostlist" ] && return + + # Translate the case of [..],..,[..] to [..] .. [..] + list="${hostlist/],/] }" + front=${list%%[*} + [[ "$front" == *,* ]] && { + new="${list%,*} " + old="${list%,*}," + list=${list/${old}/${new}} + } + + for item in $list; do + # Test if we have any []'s at all + if [ "$item" != "${item/\[/}" ]; then { + # Expand the [*] into list + name=${item%%[*} + back=${item#*]} + + if [ "$name" != "$item" ]; then + group=${item#$name[*} + group=${group%%]*} + + for range in ${group//,/ }; do + local order + + begin=${range%-*} + end=${range#*-} + + # Number of leading zeros + padlen=${#begin} + padlen2=${#end} + end=$(echo $end | sed 's/0*//') + [[ -z "$end" ]] && end=0 + [[ $padlen2 -gt $padlen ]] && { + [[ $padlen2 -eq ${#end} ]] && + padlen2=0 + padlen=$padlen2 + } + begin=$(echo $begin | sed 's/0*//') + [ -z $begin ] && begin=0 + + if [ ! -z "${begin##[!0-9]*}" ]; then + order=$(seq -f "%0${padlen}g" $begin $end) + else + order=$(eval echo {$begin..$end}); + fi + + for num in $order; do + value="${name#*,}${num}${back}" + + [ "$value" != "${value/\[/}" ] && { + value=$(hostlist_expand "$value") + } + myList="$myList $value" + done + done + fi + } else { + myList="$myList $item" + } fi + done + myList=${myList//,/ } + myList=${myList:1} # Remove first character which is a space + + # Filter any duplicates without sorting + list="$myList " + myList="${list%% *}" + + while [[ "$list" != ${myList##* } ]]; do + local tlist=" $list" + + list=${tlist// ${list%% *} / } + list=${list:1} + myList="$myList ${list%% *}" + done + myList="${myList%* }"; + + # We can select an object at an offset in the list + [ $# -eq 2 ] && { + cnt=0 + for item in $myList; do + let cnt=cnt+1 + [ $cnt -eq $offset ] && { + myList=$item + } + done + [ $(get_node_count $myList) -ne 1 ] && myList="" + } + echo $myList } facet_host() { @@ -3947,7 +4425,14 @@ facet_host() { elif [ "${facet:0:3}" == "mdt" -o \ "${facet:0:3}" == "mds" -o \ "${facet:0:3}" == "mgs" ]; then - eval export ${facet}_HOST=${mds_HOST} + local temp + if [ "${facet}" == "mgsfailover" ] && + [ -n "$mds1failover_HOST" ]; then + temp=$mds1failover_HOST + else + temp=${mds_HOST} + fi + eval export ${facet}_HOST=$temp fi fi echo -n ${!varname} @@ -3963,6 +4448,12 @@ facet_failover_host() { return fi + if combined_mgs_mds && [ $facet == "mgs" ] && + [ -n "$mds1failover_HOST" ]; then + echo $mds1failover_HOST + return + fi + if [ "${facet:0:3}" == "mdt" -o "${facet:0:3}" == "mds" -o \ "${facet:0:3}" == "mgs" ]; then @@ -3979,19 +4470,19 @@ facet_failover_host() { } facet_active() { - local facet=$1 - local activevar=${facet}active + local facet=$1 + local activevar=${facet}active - if [ -f $TMP/${facet}active ] ; then - source $TMP/${facet}active - fi + if [ -f $TMP/${facet}active ] ; then + source $TMP/${facet}active + fi - active=${!activevar} - if [ -z "$active" ] ; then - echo -n ${facet} - else - echo -n ${active} - fi + active=${!activevar} + if [ -z "$active" ] ; then + echo -n ${facet} + else + echo -n ${active} + fi } facet_active_host() { @@ -4017,53 +4508,55 @@ facet_passive_host() { } change_active() { - local facetlist=$1 - local facet + local facetlist=$1 + local facet - facetlist=$(exclude_items_from_list $facetlist mgs) + for facet in ${facetlist//,/ }; do + local failover=${facet}failover + local host=`facet_host $failover` - for facet in ${facetlist//,/ }; do - local failover=${facet}failover - local host=`facet_host $failover` - [ -z "$host" ] && return + [ -z "$host" ] && return - local curactive=`facet_active $facet` - if [ -z "${curactive}" -o "$curactive" == "$failover" ] ; then - eval export ${facet}active=$facet - else - eval export ${facet}active=$failover - fi - # save the active host for this facet - local activevar=${facet}active - echo "$activevar=${!activevar}" > $TMP/$activevar - [[ $facet = mds1 ]] && combined_mgs_mds && \ - echo "mgsactive=${!activevar}" > $TMP/mgsactive - local TO=`facet_active_host $facet` - echo "Failover $facet to $TO" - done + local curactive=`facet_active $facet` + + if [ -z "${curactive}" -o "$curactive" == "$failover" ] ; then + eval export ${facet}active=$facet + else + eval export ${facet}active=$failover + fi + # save the active host for this facet + local activevar=${facet}active + + echo "$activevar=${!activevar}" > $TMP/$activevar + [[ $facet = mds1 ]] && combined_mgs_mds && \ + echo "mgsactive=${!activevar}" > $TMP/mgsactive + local TO=`facet_active_host $facet` + echo "Failover $facet to $TO" + done } do_node() { - local verbose=false - # do not stripe off hostname if verbose, bug 19215 - if [ x$1 = x--verbose ]; then - shift - verbose=true - fi + local verbose + local quiet - local HOST=$1 - shift - local myPDSH=$PDSH - if [ "$HOST" = "$HOSTNAME" ]; then - myPDSH="no_dsh" - elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then - echo "cannot run remote command on $HOST with $myPDSH" - return 128 - fi - if $VERBOSE; then - echo "CMD: $HOST $@" >&2 - $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || : - fi + # do not strip off hostname if verbose, b=19215 + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local HOST=$1 + shift + local myPDSH=$PDSH + + if [ "$HOST" = "$HOSTNAME" ]; then + myPDSH="no_dsh" + elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then + echo "cannot run remote command on $HOST with $myPDSH" + return 128 + fi + if $VERBOSE && [[ -z "$quiet" ]]; then + echo "CMD: $HOST $*" >&2 + $myPDSH $HOST "$LCTL mark \"$*\"" > /dev/null 2>&1 || : + fi if [[ "$myPDSH" == "rsh" ]] || [[ "$myPDSH" == *pdsh* && "$myPDSH" != *-S* ]]; then @@ -4074,31 +4567,65 @@ do_node() { eval $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests; PATH=\$PATH:/sbin:/usr/sbin; cd $RPWD; - LUSTRE=\"$RLUSTRE\" sh -c \"$@\") || + LUSTRE=\"$RLUSTRE\" bash -c \"$*\") || echo command failed >$command_status" [[ -n "$($myPDSH $HOST cat $command_status)" ]] && return 1 || return 0 fi - if $verbose ; then - # print HOSTNAME for myPDSH="no_dsh" - if [[ $myPDSH = no_dsh ]]; then - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed -e "s/^/${HOSTNAME}: /" - else - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" - fi - else - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed "s/^${HOST}: //" - fi - return ${PIPESTATUS[0]} -} - -do_nodev() { - do_node --verbose "$@" + if [[ -n "$verbose" ]]; then + # print HOSTNAME for myPDSH="no_dsh" + if [[ $myPDSH = no_dsh ]]; then + $myPDSH $HOST \ + "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\ + cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$*\")" | + sed -e "s/^/${HOSTNAME}: /" + else + $myPDSH $HOST \ + "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\ + cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$*\")" + fi + else + $myPDSH $HOST \ + "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\ + cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$*\")" | + sed "s/^${HOST}: //" + fi + return ${PIPESTATUS[0]} +} + +## +# Execute exact command line on host +# +# The \a host may be on a local or remote node, which is determined at +# the time the command is run. Does careful argument quotation to +# ensure that the exact command line is executed without any globbing, +# substitution, or shell interpretation on the remote side. Does not +# support --verbose or --quiet. Does not include "$host: " prefixes on +# output. See also do_facet_vp(). +# +# usage: do_node_vp "$host" "$command" "$arg"... +do_node_vp() { + local host="$1" + shift + + if [[ "$host" == "$HOSTNAME" ]]; then + bash -c "$(printf -- ' %q' "$@")" + return $? + fi + + if [[ "${PDSH}" != *pdsh* || "${PDSH}" != *-S* ]]; then + echo "cannot run '$*' on host '${host}' with PDSH='${PDSH}'" >&2 + return 128 + fi + + # -N Disable hostname: prefix on lines of output. + + $PDSH "${host}" -N "cd $RPWD; PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; export LUSTRE=$RLUSTRE; $(printf -- ' %q' "$@")" } single_local_node () { - [ "$1" = "$HOSTNAME" ] + [ "$1" = "$HOSTNAME" ] } # Outputs environment variable assignments that should be passed to remote nodes @@ -4144,45 +4671,42 @@ get_env_vars() { } do_nodes() { - local verbose=false - # do not stripe off hostname if verbose, bug 19215 - if [ x$1 = x--verbose ]; then - shift - verbose=true - fi + local verbose + local quiet - local rnodes=$1 - shift + # do not strip off hostname if verbose, b=19215 + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift - if single_local_node $rnodes; then - if $verbose; then - do_nodev $rnodes "$@" - else - do_node $rnodes "$@" - fi - return $? - fi + local rnodes=$1 + shift + + if single_local_node $rnodes; then + do_node $verbose $quiet $rnodes "$@" + return $? + fi - # This is part from do_node - local myPDSH=$PDSH + # This is part from do_node + local myPDSH=$PDSH - [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && \ - echo "cannot run remote command on $rnodes with $myPDSH" && return 128 + [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && + echo "cannot run remote command on $rnodes with $myPDSH" && + return 128 - export FANOUT=$(get_node_count "${rnodes//,/ }") - if $VERBOSE; then - echo "CMD: $rnodes $@" >&2 - $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || : - fi + export FANOUT=$(get_node_count "${rnodes//,/ }") + if $VERBOSE && [[ -z "$quiet" ]]; then + echo "CMD: $rnodes $*" >&2 + $myPDSH $rnodes "$LCTL mark \"$*\"" > /dev/null 2>&1 || : + fi - # do not replace anything from pdsh output if -N is used - # -N Disable hostname: prefix on lines of output. - if $verbose || [[ $myPDSH = *-N* ]]; then - $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" - else - $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" | sed -re "s/^[^:]*: //g" - fi - return ${PIPESTATUS[0]} + # do not replace anything from pdsh output if -N is used + # -N Disable hostname: prefix on lines of output. + if [[ -n "$verbose" || $myPDSH = *-N* ]]; then + $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$*\")" + else + $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$*\")" | sed -re "s/^[^:]*: //g" + fi + return ${PIPESTATUS[0]} } ## @@ -4193,11 +4717,42 @@ do_nodes() { # # usage: do_facet $facet command [arg ...] do_facet() { + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + local facet=$1 shift - local HOST=$(facet_active_host $facet) - [ -z $HOST ] && echo "No host defined for facet ${facet}" && exit 1 - do_node $HOST "$@" + local host=$(facet_active_host $facet) + + [ -z "$host" ] && echo "No host defined for facet ${facet}" && exit 1 + do_node $verbose $quiet $host "$@" +} + +## +# Execute exact command line on the host of a facet +# +# The \a facet (service) may be on a local or remote node, which is +# determined at the time the command is run. Does careful argument +# quotation to ensure that the exact command line is executed without +# any globbing, substitution, or shell interpretation on the remote +# side. Does not support --verbose or --quiet. Does not include +# "$host: " prefixes on output. +# +# usage: do_facet_vp "$facet" "$command" "$arg"... +do_facet_vp() { + local facet="$1" + local host=$(facet_active_host "$facet") + shift + + if [[ -z "$host" ]]; then + echo "no host defined for facet ${facet}" >&2 + exit 1 + fi + + do_node_vp "$host" "$@" } # Function: do_facet_random_file $FACET $FILE $SIZE @@ -4220,7 +4775,7 @@ do_facet_create_file() { } do_nodesv() { - do_nodes --verbose "$@" + do_nodes --verbose "$@" } add() { @@ -4273,7 +4828,7 @@ ostdevname() { error "unknown fstype!";; esac - echo -n $DEVPTR + echo -n $DEVPTR } # Physical device location of data @@ -4399,13 +4954,13 @@ mgsvdevname() { } facet_mntpt () { - local facet=$1 - [[ $facet = mgs ]] && combined_mgs_mds && facet="mds1" + local facet=$1 + [[ $facet = mgs ]] && combined_mgs_mds && facet="mds1" - local var=${facet}_MOUNT - eval mntpt=${!var:-${MOUNT}-$facet} + local var=${facet}_MOUNT + eval mntpt=${!var:-${MOUNT}-$facet} - echo -n $mntpt + echo -n $mntpt } mount_ldiskfs() { @@ -4515,6 +5070,7 @@ stopall() { # The add fn does rm ${facet}active file, this would be enough # if we use do_facet only after the facet added, but # currently we use do_facet mds in local.sh + local num for num in `seq $MDSCOUNT`; do stop mds$num -f rm -f ${TMP}/mds${num}active @@ -4626,6 +5182,7 @@ mkfs_opts() { local opts local fs_mkfs_opts local var + local varbs=${facet}_BLOCKSIZE if [ $type == MGS ] || ( [ $type == MDS ] && [ "$dev" == $(mgsdevname) ] && @@ -4649,14 +5206,11 @@ mkfs_opts() { opts+=${LDLM_TIMEOUT:+" --param=sys.ldlm_timeout=$LDLM_TIMEOUT"} if [ $type == MDS ]; then - opts+=${MDSCAPA:+" --param-mdt.capa=$MDSCAPA"} opts+=${DEF_STRIPE_SIZE:+" --param=lov.stripesize=$DEF_STRIPE_SIZE"} opts+=${DEF_STRIPE_COUNT:+" --param=lov.stripecount=$DEF_STRIPE_COUNT"} opts+=${L_GETIDENTITY:+" --param=mdt.identity_upcall=$L_GETIDENTITY"} if [ $fstype == ldiskfs ]; then - fs_mkfs_opts+="-O ea_inode,large_dir" - var=${facet}_JRN if [ -n "${!var}" ]; then fs_mkfs_opts+=" -J device=${!var}" @@ -4668,8 +5222,6 @@ mkfs_opts() { fi if [ $type == OST ]; then - opts+=${OSSCAPA:+" --param=ost.capa=$OSSCAPA"} - if [ $fstype == ldiskfs ]; then var=${facet}_JRN if [ -n "${!var}" ]; then @@ -4695,6 +5247,7 @@ mkfs_opts() { [[ "$QUOTA_TYPE" =~ "p" ]] && fs_mkfs_opts+=" -O project" + [ $fstype == ldiskfs ] && fs_mkfs_opts+=" -b ${!varbs:-$BLCKSIZE}" [ $fstype == ldiskfs ] && fs_mkfs_opts=$(squash_opt $fs_mkfs_opts) if [ -n "${fs_mkfs_opts## }" ]; then @@ -4854,35 +5407,30 @@ umount_client() { grep " $1 " /proc/mounts && zconf_umount $HOSTNAME $* } -# return value: -# 0: success, the old identity set already. -# 1: success, the old identity does not set. +# usage: switch_identity MDSNUM ENABLE_UPCALL +# +# return values: +# 0: success, the identity upcall was previously enabled already. +# 1: success, the identity upcall was previously disabled. # 2: fail. switch_identity() { - local num=$1 - local switch=$2 - local j=`expr $num - 1` - local MDT="`(do_facet mds$num lctl get_param -N mdt.*MDT*$j 2>/dev/null | cut -d"." -f2 2>/dev/null) || true`" + local num=$1 + local enable=$2 + local facet=mds$num + local MDT="$(mdtname_from_index $((num - 1)) $MOUNT)" + local upcall="$L_GETIDENTITY" - if [ -z "$MDT" ]; then - return 2 - fi + [[ -n "$MDT" ]] || return 2 - local old="`do_facet mds$num "lctl get_param -n mdt.$MDT.identity_upcall"`" + local param="mdt.$MDT.identity_upcall" + local old="$(do_facet $facet "lctl get_param -n $param")" - if $switch; then - do_facet mds$num "lctl set_param -n mdt.$MDT.identity_upcall \"$L_GETIDENTITY\"" - else - do_facet mds$num "lctl set_param -n mdt.$MDT.identity_upcall \"NONE\"" - fi + [[ "$enable" == "true" ]] || upcall="NONE" - do_facet mds$num "lctl set_param -n mdt/$MDT/identity_flush=-1" + do_facet $facet "lctl set_param -n $param='$upcall'" || return 2 + do_facet $facet "lctl set_param -n mdt.$MDT.identity_flush=-1" - if [ $old = "NONE" ]; then - return 1 - else - return 0 - fi + [[ "$old" != "NONE" ]] # implicit "&& return 0 || return 1" } remount_client() @@ -4942,12 +5490,21 @@ mountmds() { eval $varname=$host fi done - if [ $IDENTITY_UPCALL != "default" ]; then + if [[ "$IDENTITY_UPCALL" != "default" ]]; then switch_identity $num $IDENTITY_UPCALL fi done } +unmountoss() { + local num + + for num in $(seq $OSTCOUNT); do + stop ost$num -f + rm -f $TMP/ost${num}active + done +} + mountoss() { local num local devname @@ -5102,7 +5659,7 @@ init_facet_vars () { shift eval export ${facet}_dev=${device} - eval export ${facet}_opt=\"$@\" + eval export ${facet}_opt=\"$*\" local dev=${facet}_dev @@ -5137,7 +5694,14 @@ init_facet_vars () { local varname=${facet}failover_HOST if [ -z "${!varname}" ]; then - eval export $varname=$(facet_host $facet) + local temp + if combined_mgs_mds && [ $facet == "mgs" ] && + [ -n "$mds1failover_HOST" ]; then + temp=$mds1failover_HOST + else + temp=$(facet_host $facet) + fi + eval export $varname=$temp fi varname=${facet}_HOST @@ -5170,12 +5734,12 @@ init_facets_vars () { if ! remote_mds_nodsh; then for num in $(seq $MDSCOUNT); do - DEVNAME=`mdsdevname $num` + DEVNAME=$(mdsdevname $num) init_facet_vars mds$num $DEVNAME $MDS_MOUNT_OPTS done fi - combined_mgs_mds || init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS + init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS if ! remote_ost_nodsh; then for num in $(seq $OSTCOUNT); do @@ -5211,22 +5775,23 @@ init_facets_vars_simple () { } osc_ensure_active () { - local facet=$1 - local timeout=$2 - local period=0 + local facet=$1 + local timeout=$2 + local period=0 - while [ $period -lt $timeout ]; do - count=$(do_facet $facet "lctl dl | grep ' IN osc ' 2>/dev/null | wc -l") - if [ $count -eq 0 ]; then - break - fi + while [ $period -lt $timeout ]; do + count=$(do_facet $facet "lctl dl | grep ' IN osc ' 2>/dev/null | wc -l") + if [ $count -eq 0 ]; then + break + fi - echo "There are $count OST are inactive, wait $period seconds, and try again" - sleep 3 - period=$((period+3)) - done + echo "$count OST inactive, wait $period seconds, and try again" + sleep 3 + period=$((period+3)) + done - [ $period -lt $timeout ] || log "$count OST are inactive after $timeout seconds, give up" + [ $period -lt $timeout ] || + log "$count OST are inactive after $timeout seconds, give up" } set_conf_param_and_check() { @@ -5304,6 +5869,11 @@ init_param_vars () { TIMEOUT=$(do_facet $SINGLEMDS "lctl get_param -n timeout") log "Using TIMEOUT=$TIMEOUT" + # tune down to speed up testing on (usually) small setups + local mgc_timeout=/sys/module/mgc/parameters/mgc_requeue_timeout_min + do_nodes $(comma_list $(nodes_list)) \ + "[ -f $mgc_timeout ] && echo 1 > $mgc_timeout; exit 0" + osc_ensure_active $SINGLEMDS $TIMEOUT osc_ensure_active client $TIMEOUT $LCTL set_param osc.*.idle_timeout=debug @@ -5334,27 +5904,28 @@ init_param_vars () { fi (( MDS1_VERSION <= $(version_code 2.13.52) )) || - do_nodes $(comma_list $(mdts_nodes)) \ - "$LCTL set_param lod.*.mdt_hash=crush" + do_facet mgs "$LCTL set_param -P lod.*.mdt_hash=crush" return 0 } nfs_client_mode () { - if [ "$NFSCLIENT" ]; then - echo "NFSCLIENT mode: setup, cleanup, check config skipped" - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - # FIXME: remove hostname when 19215 fixed - do_nodes $clients "echo \\\$(hostname); grep ' '$MOUNT' ' /proc/mounts" - declare -a nfsexport=(`grep ' '$MOUNT' ' /proc/mounts | awk '{print $1}' | awk -F: '{print $1 " " $2}'`) - if [[ ${#nfsexport[@]} -eq 0 ]]; then - error_exit NFSCLIENT=$NFSCLIENT mode, but no NFS export found! - fi - do_nodes ${nfsexport[0]} "echo \\\$(hostname); df -T ${nfsexport[1]}" - return - fi - return 1 + if [ "$NFSCLIENT" ]; then + echo "NFSCLIENT mode: setup, cleanup, check config skipped" + local clients=$CLIENTS + + [ -z $clients ] && clients=$(hostname) + + # FIXME: remove hostname when 19215 fixed + do_nodes $clients "echo \\\$(hostname); grep ' '$MOUNT' ' /proc/mounts" + declare -a nfsexport=(`grep ' '$MOUNT' ' /proc/mounts | + awk '{print $1}' | awk -F: '{print $1 " " $2}'`) + if [[ ${#nfsexport[@]} -eq 0 ]]; then + error_exit NFSCLIENT=$NFSCLIENT mode, but no NFS export found! + fi + do_nodes ${nfsexport[0]} "echo \\\$(hostname); df -T ${nfsexport[1]}" + return + fi + return 1 } cifs_client_mode () { @@ -5363,34 +5934,37 @@ cifs_client_mode () { } check_config_client () { - local mntpt=$1 - - local mounted=$(mount | grep " $mntpt ") - if [ -n "$CLIENTONLY" ]; then - # bug 18021 - # CLIENTONLY should not depend on *_HOST settings - local mgc=$($LCTL device_list | awk '/MGC/ {print $4}') - # in theory someone could create a new, - # client-only config file that assumed lustre was already - # configured and didn't set the MGSNID. If MGSNID is not set, - # then we should use the mgs nid currently being used - # as the default value. bug 18021 - [[ x$MGSNID = x ]] && - MGSNID=${mgc//MGC/} - - if [[ x$mgc != xMGC$MGSNID ]]; then - if [ "$mgs_HOST" ]; then - local mgc_ip=$(ping -q -c1 -w1 $mgs_HOST | grep PING | awk '{print $3}' | sed -e "s/(//g" -e "s/)//g") -# [[ x$mgc = xMGC$mgc_ip@$NETTYPE ]] || -# error_exit "MGSNID=$MGSNID, mounted: $mounted, MGC : $mgc" - fi - fi - return 0 - fi + local mntpt=$1 + local mounted=$(mount | grep " $mntpt ") + + if [ -n "$CLIENTONLY" ]; then + # bug 18021 + # CLIENTONLY should not depend on *_HOST settings + local mgc=$($LCTL device_list | awk '/MGC/ {print $4}') + # in theory someone could create a new, + # client-only config file that assumed lustre was already + # configured and didn't set the MGSNID. If MGSNID is not set, + # then we should use the mgs nid currently being used + # as the default value. bug 18021 + [[ x$MGSNID = x ]] && + MGSNID=${mgc//MGC/} + + if [[ x$mgc != xMGC$MGSNID ]]; then + if [ "$mgs_HOST" ]; then + local mgc_ip=$(ping -q -c1 -w1 $mgs_HOST | + grep PING | awk '{print $3}' | + sed -e "s/(//g" -e "s/)//g") + + # [[ x$mgc = xMGC$mgc_ip@$NETTYPE ]] || + # error_exit "MGSNID=$MGSNID, mounted: $mounted, MGC : $mgc" + fi + fi + return 0 + fi - echo Checking config lustre mounted on $mntpt - local mgshost=$(mount | grep " $mntpt " | awk -F@ '{print $1}') - mgshost=$(echo $mgshost | awk -F: '{print $1}') + echo Checking config lustre mounted on $mntpt + local mgshost=$(mount | grep " $mntpt " | awk -F@ '{print $1}') + mgshost=$(echo $mgshost | awk -F: '{print $1}') } @@ -5407,12 +5981,12 @@ check_config_clients () { } check_timeout () { - local mdstimeout=$(do_facet $SINGLEMDS "lctl get_param -n timeout") - local cltimeout=$(lctl get_param -n timeout) - if [ $mdstimeout -ne $TIMEOUT ] || [ $mdstimeout -ne $cltimeout ]; then - error "timeouts are wrong! mds: $mdstimeout, client: $cltimeout, TIMEOUT=$TIMEOUT" - return 1 - fi + local mdstimeout=$(do_facet $SINGLEMDS "lctl get_param -n timeout") + local cltimeout=$(lctl get_param -n timeout) + if [ $mdstimeout -ne $TIMEOUT ] || [ $mdstimeout -ne $cltimeout ]; then + error "timeouts are wrong! mds: $mdstimeout, client: $cltimeout, TIMEOUT=$TIMEOUT" + return 1 + fi } is_mounted () { @@ -5423,28 +5997,111 @@ is_mounted () { echo $mounted' ' | grep -w -q $mntpt' ' } -is_empty_dir() { - [ $(find $1 -maxdepth 1 -print | wc -l) = 1 ] && return 0 - return 1 +create_pools () { + local pool=$1 + local ostsn=${2:-$OSTCOUNT} + local npools=${FS_NPOOLS:-$((OSTCOUNT / ostsn))} + local n + + echo ostsn=$ostsn npools=$npools + if [[ $ostsn -gt $OSTCOUNT ]]; then + echo "request to use $ostsn OSTs in the pool, \ + using max available OSTCOUNT=$OSTCOUNT" + ostsn=$OSTCOUNT + fi + for (( n=0; n < $npools; n++ )); do + p=${pool}$n + if ! $DELETE_OLD_POOLS; then + log "request to not delete old pools: $FSNAME.$p exist?" + if ! check_pool_not_exist $FSNAME.$p; then + echo "Using existing $FSNAME.$p" + $LCTL pool_list $FSNAME.$p + continue + fi + fi + create_pool $FSNAME.$p $KEEP_POOLS || + error "create_pool $FSNAME.$p failed" + + local first=$(( (n * ostsn) % OSTCOUNT )) + local last=$(( (first + ostsn - 1) % OSTCOUNT )) + if [[ $first -le $last ]]; then + pool_add_targets $p $first $last || + error "pool_add_targets $p $first $last failed" + else + pool_add_targets $p $first $(( OSTCOUNT - 1 )) || + error "pool_add_targets $p $first \ + $(( OSTCOUNT - 1 )) failed" + pool_add_targets $p 0 $last || + error "pool_add_targets $p 0 $last failed" + fi + done } -# empty lustre filesystem may have empty directories lost+found and .lustre -is_empty_fs() { - # exclude .lustre & lost+found - [ $(find $1 -maxdepth 1 -name lost+found -o -name .lustre -prune -o \ - -print | wc -l) = 1 ] || return 1 - [ ! -d $1/lost+found ] || is_empty_dir $1/lost+found || return 1 - if [ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.4.0) ]; then - # exclude .lustre/fid (LU-2780) - [ $(find $1/.lustre -maxdepth 1 -name fid -prune -o \ - -print | wc -l) = 1 ] || return 1 - else - [ ! -d $1/.lustre ] || is_empty_dir $1/.lustre || return 1 - fi - return 0 +set_pools_quota () { + local u + local o + local p + local i + local j + + [[ $ENABLE_QUOTA ]] || error "Required Pool Quotas: \ + $POOLS_QUOTA_USERS_SET, but ENABLE_QUOTA not set!" + + # POOLS_QUOTA_USERS_SET= + # "quota15_1:20M -- for all of the found pools + # quota15_2:1G:gpool0 + # quota15_3 -- for global limit only + # quota15_4:200M:gpool0 + # quota15_4:200M:gpool1" + + declare -a pq_userset=(${POOLS_QUOTA_USERS_SET="mpiuser"}) + declare -a pq_users + declare -A pq_limits + + for ((i=0; i<${#pq_userset[@]}; i++)); do + u=${pq_userset[i]%%:*} + o="" + # user gets no pool limits if + # POOLS_QUOTA_USERS_SET does not specify it + [[ ${pq_userset[i]} =~ : ]] && o=${pq_userset[i]##$u:} + pq_limits[$u]+=" $o" + done + pq_users=(${!pq_limits[@]}) + + declare -a opts + local pool + + for ((i=0; i<${#pq_users[@]}; i++)); do + u=${pq_users[i]} + # set to max limit (_u64) + $LFS setquota -u $u -B $((2**24 - 1))T $DIR + opts=(${pq_limits[$u]}) + for ((j=0; j<${#opts[@]}; j++)); do + p=${opts[j]##*:} + o=${opts[j]%%:*} + # Set limit for all existing pools if + # no pool specified + if [ $p == $o ]; then + p=$(list_pool $FSNAME | sed "s/$FSNAME.//") + echo "No pool specified for $u, + set limit $o for all existing pools" + fi + for pool in $p; do + $LFS setquota -u $u -B $o --pool $pool $DIR || + error "setquota -u $u -B $o \ + --pool $pool failed" + done + done + $LFS quota -uv $u --pool $DIR + done } -check_and_setup_lustre() { +do_check_and_setup_lustre() { + # If auster does not want us to setup, then don't. + ! ${do_setup} && return + + echo "=== $TESTSUITE: start setup $(date +'%H:%M:%S (%s)') ===" + sanitize_parameters nfs_client_mode && return cifs_client_mode && return @@ -5461,36 +6118,35 @@ check_and_setup_lustre() { is_mounted $MOUNT || error "NAME=$NAME not mounted" export I_MOUNTED=yes do_check=false - # 2. - # MOUNT2 is mounted - elif is_mounted $MOUNT2; then - # 3. - # MOUNT2 is mounted, while MOUNT_2 is not set - if ! [ "$MOUNT_2" ]; then - cleanup_mount $MOUNT2 - export I_UMOUNTED2=yes - - # 4. - # MOUNT2 is mounted, MOUNT_2 is set - else - # FIXME: what to do if check_config failed? - # i.e. if: - # 1) remote client has mounted other Lustre fs ? - # 2) it has insane env ? - # let's try umount MOUNT2 on all clients and mount it again: - if ! check_config_clients $MOUNT2; then - cleanup_mount $MOUNT2 - restore_mount $MOUNT2 - export I_MOUNTED2=yes - fi - fi - - # 5. - # MOUNT is mounted MOUNT2 is not mounted - elif [ "$MOUNT_2" ]; then - restore_mount $MOUNT2 - export I_MOUNTED2=yes - fi + # 2. + # MOUNT2 is mounted + elif is_mounted $MOUNT2; then + # 3. + # MOUNT2 is mounted, while MOUNT_2 is not set + if ! [ "$MOUNT_2" ]; then + cleanup_mount $MOUNT2 + export I_UMOUNTED2=yes + + # 4. + # MOUNT2 is mounted, MOUNT_2 is set + else + # FIXME: what to do if check_config failed? + # i.e. if: + # 1) remote client has mounted other Lustre fs ? + # 2) it has insane env ? + # try to umount MOUNT2 on all clients and mount again: + if ! check_config_clients $MOUNT2; then + cleanup_mount $MOUNT2 + restore_mount $MOUNT2 + export I_MOUNTED2=yes + fi + fi + # 5. + # MOUNT is mounted MOUNT2 is not mounted + elif [ "$MOUNT_2" ]; then + restore_mount $MOUNT2 + export I_MOUNTED2=yes + fi if $do_check; then # FIXME: what to do if check_config failed? @@ -5523,22 +6179,71 @@ check_and_setup_lustre() { if [ -n "$fs_STRIPEPARAMS" ]; then setstripe_getstripe $MOUNT $fs_STRIPEPARAMS fi + if $GSS_SK; then set_flavor_all null elif $GSS; then set_flavor_all $SEC fi - if [ "$ONLY" == "setup" ]; then + if $DELETE_OLD_POOLS; then + destroy_all_pools + fi + + if [[ -n "$FS_POOL" ]]; then + create_pools $FS_POOL $FS_POOL_NOSTS + fi + + if [[ -n "$POOLS_QUOTA_USERS_SET" ]]; then + set_pools_quota + fi + + # set tunable parameters passed to test environment + set_params_clients + set_params_mdts + set_params_osts + echo "=== $TESTSUITE: finish setup $(date +'%H:%M:%S (%s)') ===" + + if [[ "$ONLY" == "setup" ]]; then exit 0 fi } +check_and_setup_lustre() { + local start_stamp=$(date +%s) + local saved_umask=$(umask) + local log=$TESTLOG_PREFIX.test_setup.test_log.$(hostname -s).log + local status='PASS' + local stop_stamp=0 + local duration=0 + local error='' + local rc=0 + + umask 0022 + + log_sub_test_begin test_setup + + if ! do_check_and_setup_lustre 2>&1 > >(tee -i $log); then + error=$(tail -1 $log) + status='FAIL' + rc=1 + fi + + stop_stamp=$(date +%s) + duration=$((stop_stamp - start_stamp)) + + log_sub_test_end "$status" "$duration" "$rc" "$error" + + umask $saved_umask + + return $rc +} + restore_mount () { - local clients=${CLIENTS:-$HOSTNAME} - local mntpt=$1 + local clients=${CLIENTS:-$HOSTNAME} + local mntpt=$1 - zconf_mount_clients $clients $mntpt + zconf_mount_clients $clients $mntpt } cleanup_mount () { @@ -5549,59 +6254,16 @@ cleanup_mount () { } cleanup_and_setup_lustre() { - if [ "$ONLY" == "cleanup" -o "`mount | grep $MOUNT`" ]; then - lctl set_param debug=0 || true - cleanupall - if [ "$ONLY" == "cleanup" ]; then - exit 0 - fi - fi - check_and_setup_lustre -} - -# Get all of the server target devices from a given server node and type. -get_mnt_devs() { - local node=$1 - local type=$2 - local devs - local dev + if [[ "$ONLY" == "cleanup" ]] || grep -q "$MOUNT" /proc/mounts; then + lctl set_param debug=0 || true + cleanupall - if [ "$type" == ost ]; then - devs=$(get_osd_param $node "" mntdev) - else - devs=$(do_node $node $LCTL get_param -n osd-*.$FSNAME-M*.mntdev) + if [[ "$ONLY" == "cleanup" ]]; then + exit 0 + fi fi - for dev in $devs; do - case $dev in - *loop*) do_node $node "losetup $dev" | \ - sed -e "s/.*(//" -e "s/).*//" ;; - *) echo $dev ;; - esac - done -} - -# Get all of the server target devices. -get_svr_devs() { - local node - local i - - # Master MDS parameters used by lfsck - MDTNODE=$(facet_active_host $SINGLEMDS) - MDTDEV=$(echo $(get_mnt_devs $MDTNODE mdt) | awk '{print $1}') - - # MDT devices - i=0 - for node in $(mdts_nodes); do - MDTDEVS[i]=$(get_mnt_devs $node mdt) - i=$((i + 1)) - done - # OST devices - i=0 - for node in $(osts_nodes); do - OSTDEVS[i]=$(get_mnt_devs $node ost) - i=$((i + 1)) - done + do_check_and_setup_lustre } # Run e2fsck on MDT or OST device. @@ -5741,64 +6403,98 @@ log_zfs_info() { fi } -check_and_cleanup_lustre() { - if [ "$LFSCK_ALWAYS" = "yes" -a "$TESTSUITE" != "sanity-lfsck" -a \ - "$TESTSUITE" != "sanity-scrub" ]; then +do_check_and_cleanup_lustre() { + echo "=== $TESTSUITE: start cleanup $(date +'%H:%M:%S (%s)') ===" + + if [[ "$LFSCK_ALWAYS" == "yes" && "$TESTSUITE" != "sanity-lfsck" && \ + "$TESTSUITE" != "sanity-scrub" ]]; then run_lfsck fi if is_mounted $MOUNT; then if $DO_CLEANUP; then - [ -n "$DIR" ] && rm -rf $DIR/[Rdfs][0-9]* || + [[ -n "$DIR" ]] && rm -rf $DIR/[Rdfs][0-9]* || error "remove sub-test dirs failed" else echo "skip cleanup" fi - [ "$ENABLE_QUOTA" ] && restore_quota || true + [[ -n "$ENABLE_QUOTA" ]] && restore_quota || true fi - if [ "$I_UMOUNTED2" = "yes" ]; then + if [[ "$I_UMOUNTED2" == "yes" ]]; then restore_mount $MOUNT2 || error "restore $MOUNT2 failed" fi - if [ "$I_MOUNTED2" = "yes" ]; then + if [[ "$I_MOUNTED2" == "yes" ]]; then cleanup_mount $MOUNT2 fi - if [[ "$I_MOUNTED" = "yes" ]] && ! $AUSTER_CLEANUP; then + if [[ "$I_MOUNTED" == "yes" ]] && ! $AUSTER_CLEANUP; then cleanupall -f || error "cleanup failed" unset I_MOUNTED fi + + echo "=== $TESTSUITE: finish cleanup $(date +'%H:%M:%S (%s)') ===" +} + +check_and_cleanup_lustre() { + local start_stamp=$(date +%s) + local saved_umask=$(umask) + local log=$TESTLOG_PREFIX.test_cleanup.test_log.$(hostname -s).log + local status='PASS' + local stop_stamp=0 + local duration=0 + local error='' + local rc=0 + + umask 0022 + + log_sub_test_begin test_cleanup + + if ! do_check_and_cleanup_lustre 2>&1 > >(tee -i $log); then + error=$(tail -1 $log) + status='FAIL' + rc=1 + fi + + stop_stamp=$(date +%s) + duration=$((stop_stamp - start_stamp)) + + log_sub_test_end "$status" "$duration" "$rc" "$error" + + umask $saved_umask + + return $rc } ####### # General functions wait_for_function () { - local quiet="" - - # suppress fn both stderr and stdout - if [ "$1" = "--quiet" ]; then - shift - quiet=" > /dev/null 2>&1" + local quiet="" - fi + # suppress fn both stderr and stdout + if [ "$1" = "--quiet" ]; then + shift + quiet=" > /dev/null 2>&1" + fi - local fn=$1 - local max=${2:-900} - local sleep=${3:-5} + local fn=$1 + local max=${2:-900} + local sleep=${3:-5} - local wait=0 + local wait=0 - while true; do + while true; do - eval $fn $quiet && return 0 + eval $fn $quiet && return 0 - wait=$((wait + sleep)) - [ $wait -lt $max ] || return 1 - echo waiting $fn, $((max - wait)) secs left ... - sleep $sleep - done + [ $wait -lt $max ] || return 1 + echo waiting $fn, $((max - wait)) secs left ... + wait=$((wait + sleep)) + [ $wait -gt $max ] && ((sleep -= wait - max)) + sleep $sleep + done } check_network() { @@ -5815,8 +6511,8 @@ check_network() { } no_dsh() { - shift - eval $@ + shift + eval "$@" } # Convert a space-delimited list to a comma-delimited list. If the input is @@ -5828,48 +6524,48 @@ comma_list() { } list_member () { - local list=$1 - local item=$2 - echo $list | grep -qw $item + local list=$1 + local item=$2 + echo $list | grep -qw $item } # list, excluded are the comma separated lists exclude_items_from_list () { - local list=$1 - local excluded=$2 - local item + local list=$1 + local excluded=$2 + local item - list=${list//,/ } - for item in ${excluded//,/ }; do - list=$(echo " $list " | sed -re "s/\s+$item\s+/ /g") - done - echo $(comma_list $list) + list=${list//,/ } + for item in ${excluded//,/ }; do + list=$(echo " $list " | sed -re "s/\s+$item\s+/ /g") + done + echo $(comma_list $list) } # list, expand are the comma separated lists expand_list () { - local list=${1//,/ } - local expand=${2//,/ } - local expanded= + local list=${1//,/ } + local expand=${2//,/ } + local expanded= - expanded=$(for i in $list $expand; do echo $i; done | sort -u) - echo $(comma_list $expanded) + expanded=$(for i in $list $expand; do echo $i; done | sort -u) + echo $(comma_list $expanded) } testslist_filter () { - local script=$LUSTRE/tests/${TESTSUITE}.sh + local script=$LUSTRE/tests/${TESTSUITE}.sh - [ -f $script ] || return 0 + [ -f $script ] || return 0 - local start_at=$START_AT - local stop_at=$STOP_AT + local start_at=$START_AT + local stop_at=$STOP_AT - local var=${TESTSUITE//-/_}_START_AT - [ x"${!var}" != x ] && start_at=${!var} - var=${TESTSUITE//-/_}_STOP_AT - [ x"${!var}" != x ] && stop_at=${!var} + local var=${TESTSUITE//-/_}_START_AT + [ x"${!var}" != x ] && start_at=${!var} + var=${TESTSUITE//-/_}_STOP_AT + [ x"${!var}" != x ] && stop_at=${!var} - sed -n 's/^test_\([^ (]*\).*/\1/p' $script | \ + sed -n 's/^test_\([^ (]*\).*/\1/p' $script | awk ' BEGIN { if ("'${start_at:-0}'" != 0) flag = 1 } /^'${start_at}'$/ {flag = 0} {if (flag == 1) print $0} @@ -5877,78 +6573,99 @@ testslist_filter () { } absolute_path() { - (cd `dirname $1`; echo $PWD/`basename $1`) + (cd `dirname $1`; echo $PWD/`basename $1`) } get_facets () { - local types=${1:-"OST MDS MGS"} - - local list="" - - for entry in $types; do - local name=$(echo $entry | tr "[:upper:]" "[:lower:]") - local type=$(echo $entry | tr "[:lower:]" "[:upper:]") - - case $type in - MGS ) list="$list $name";; - MDS|OST|AGT ) local count=${type}COUNT - for ((i=1; i<=${!count}; i++)) do - list="$list ${name}$i" - done;; - * ) error "Invalid facet type" - exit 1;; - esac - done - echo $(comma_list $list) + local types=${1:-"OST MDS MGS"} + + local list="" + + for entry in $types; do + local name=$(echo $entry | tr "[:upper:]" "[:lower:]") + local type=$(echo $entry | tr "[:lower:]" "[:upper:]") + + case $type in + MGS ) list="$list $name";; + MDS|OST|AGT ) local count=${type}COUNT + for ((i=1; i<=${!count}; i++)) do + list="$list ${name}$i" + done;; + * ) error "Invalid facet type" + exit 1;; + esac + done + echo $(comma_list $list) } ################################## # Adaptive Timeouts funcs at_is_enabled() { - # only check mds, we assume at_max is the same on all nodes - local at_max=$(do_facet $SINGLEMDS "lctl get_param -n at_max") - if [ $at_max -eq 0 ]; then - return 1 - else - return 0 - fi + # only check mds, we assume at_max is the same on all nodes + local at_max=$(do_facet $SINGLEMDS "lctl get_param -n at_max") + + if [ $at_max -eq 0 ]; then + return 1 + else + return 0 + fi } at_get() { - local facet=$1 - local at=$2 + local facet=$1 + local at=$2 - # suppose that all ost-s have the same $at value set - [ $facet != "ost" ] || facet=ost1 + # suppose that all ost-s have the same $at value set + [ $facet != "ost" ] || facet=ost1 - do_facet $facet "lctl get_param -n $at" + do_facet $facet "lctl get_param -n $at" } at_max_get() { - at_get $1 at_max + at_get $1 at_max +} + +at_max_set() { + local at_max=$1 + shift + + local facet + local hosts + + for facet in "$@"; do + if [ $facet == "ost" ]; then + facet=$(get_facets OST) + elif [ $facet == "mds" ]; then + facet=$(get_facets MDS) + fi + hosts=$(expand_list $hosts $(facets_hosts $facet)) + done + + do_nodes $hosts lctl set_param at_max=$at_max } at_min_get() { at_get $1 at_min } -at_max_set() { - local at_max=$1 - shift +at_min_set() { + local at_min=$1 + shift + + local facet + local hosts - local facet - local hosts - for facet in $@; do - if [ $facet == "ost" ]; then - facet=$(get_facets OST) - elif [ $facet == "mds" ]; then - facet=$(get_facets MDS) - fi - hosts=$(expand_list $hosts $(facets_hosts $facet)) - done + for facet in "$@"; do + if [ $facet == "ost" ]; then + facet=$(get_facets OST) + elif [ $facet == "mds" ]; then + facet=$(get_facets MDS) + fi + hosts=$(expand_list $hosts $(facets_hosts $facet)) + done - do_nodes $hosts lctl set_param at_max=$at_max + do_nodes $hosts lctl set_param at_min=$at_min } ################################## @@ -5956,11 +6673,11 @@ at_max_set() { drop_request() { # OBD_FAIL_MDS_ALL_REQUEST_NET - RC=0 - do_facet $SINGLEMDS lctl set_param fail_val=0 fail_loc=0x123 - do_facet client "$1" || RC=$? - do_facet $SINGLEMDS lctl set_param fail_loc=0 - return $RC + RC=0 + do_facet $SINGLEMDS lctl set_param fail_val=0 fail_loc=0x123 + do_facet client "$1" || RC=$? + do_facet $SINGLEMDS lctl set_param fail_loc=0 + return $RC } drop_reply() { @@ -6044,34 +6761,36 @@ drop_bl_callback() { drop_mdt_ldlm_reply() { #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157 - RC=0 - local list=$(comma_list $(mdts_nodes)) - do_nodes $list lctl set_param fail_loc=0x157 + RC=0 + local list=$(comma_list $(mdts_nodes)) - do_facet client "$@" || RC=$? + do_nodes $list lctl set_param fail_loc=0x157 + + do_facet client "$@" || RC=$? - do_nodes $list lctl set_param fail_loc=0 - return $RC + do_nodes $list lctl set_param fail_loc=0 + return $RC } drop_mdt_ldlm_reply_once() { #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157 - RC=0 - local list=$(comma_list $(mdts_nodes)) - do_nodes $list lctl set_param fail_loc=0x80000157 + RC=0 + local list=$(comma_list $(mdts_nodes)) - do_facet client "$@" || RC=$? + do_nodes $list lctl set_param fail_loc=0x80000157 - do_nodes $list lctl set_param fail_loc=0 - return $RC + do_facet client "$@" || RC=$? + + do_nodes $list lctl set_param fail_loc=0 + return $RC } clear_failloc() { - facet=$1 - pause=$2 - sleep $pause - echo "clearing fail_loc on $facet" - do_facet $facet "lctl set_param fail_loc=0 2>/dev/null || true" + facet=$1 + pause=$2 + sleep $pause + echo "clearing fail_loc on $facet" + do_facet $facet "lctl set_param fail_loc=0 2>/dev/null || true" } set_nodes_failloc () { @@ -6079,10 +6798,28 @@ set_nodes_failloc () { do_nodes $(comma_list $1) lctl set_param fail_val=$fv fail_loc=$2 } +# Print the total of the lock_unused_count across all namespaces containing the +# given wildcard. If the namespace wildcard is omitted, all namespaces will be +# matched. +# Usage: total_unused_locks [namespace_wildcard] +total_unused_locks() { + $LCTL get_param -n "ldlm.namespaces.*$1*.lock_unused_count" | calc_sum +} + +# Print the total of the lock_count across all namespaces containing the given +# wildcard. If the namespace wilcard is omitted, all namespaces will be matched. +# Usage: total_used_locks [namespace_wildcard] +total_used_locks() { + $LCTL get_param -n "ldlm.namespaces.*$1*.lock_count" | calc_sum +} + +# Cancel lru locks across all namespaces containing the given wildcard. If the +# wilcard is omitted, lru locks will be canceled across all namespaces. +# Usage: cancel_lru_locks [namespace_wildcard] cancel_lru_locks() { #$LCTL mark "cancel_lru_locks $1 start" - $LCTL set_param -n ldlm.namespaces.*$1*.lru_size=clear - $LCTL get_param ldlm.namespaces.*$1*.lock_unused_count | grep -v '=0' + $LCTL set_param -t4 -n "ldlm.namespaces.*$1*.lru_size=clear" + $LCTL get_param "ldlm.namespaces.*$1*.lock_unused_count" | grep -v '=0' #$LCTL mark "cancel_lru_locks $1 stop" } @@ -6095,7 +6832,7 @@ default_lru_size() lru_resize_enable() { - lctl set_param ldlm.namespaces.*$1*.lru_size=0 + lctl set_param ldlm.namespaces.*$1*.lru_size=0 } lru_resize_disable() @@ -6116,15 +6853,16 @@ flock_is_enabled() } pgcache_empty() { - local FILE - for FILE in `lctl get_param -N "llite.*.dump_page_cache"`; do - if [ `lctl get_param -n $FILE | wc -l` -gt 1 ]; then - echo there is still data in page cache $FILE ? - lctl get_param -n $FILE - return 1 - fi - done - return 0 + local FILE + + for FILE in `lctl get_param -N "llite.*.dump_page_cache"`; do + if [ `lctl get_param -n $FILE | wc -l` -gt 1 ]; then + echo there is still data in page cache $FILE ? + lctl get_param -n $FILE + return 1 + fi + done + return 0 } debugsave() { @@ -6146,13 +6884,13 @@ debugrestore() { } debug_size_save() { - DEBUG_SIZE_SAVED="$(lctl get_param -n debug_mb)" + DEBUG_SIZE_SAVED="$(lctl get_param -n debug_mb)" } debug_size_restore() { - [ -n "$DEBUG_SIZE_SAVED" ] && \ - do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug_mb=$DEBUG_SIZE_SAVED" - DEBUG_SIZE_SAVED="" + [ -n "$DEBUG_SIZE_SAVED" ] && + do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug_mb=$DEBUG_SIZE_SAVED" + DEBUG_SIZE_SAVED="" } start_full_debug_logging() { @@ -6193,7 +6931,7 @@ report_error() { dump=false fi - log " ${TESTSUITE} ${TESTNAME}: @@@@@@ ${TYPE}: $@ " + log " ${TESTSUITE} ${TESTNAME}: @@@@@@ ${TYPE}: $* " (print_stack_trace 2) >&2 mkdir -p $LOGDIR # We need to dump the logs on all nodes @@ -6203,7 +6941,7 @@ report_error() { debugrestore [ "$TESTSUITELOG" ] && - echo "$TESTSUITE: $TYPE: $TESTNAME $@" >> $TESTSUITELOG + echo "$TESTSUITE: $TYPE: $TESTNAME $*" >> $TESTSUITELOG if [ -z "$*" ]; then echo "error() without useful message, please fix" > $LOGDIR/err else @@ -6314,12 +7052,12 @@ error_not_in_vm() { # environment is not configured properly". # skip_env () { - $FAIL_ON_SKIP_ENV && error false $@ || skip $@ + $FAIL_ON_SKIP_ENV && error false "$@" || skip "$@" } skip_noexit() { echo - log " SKIP: $TESTSUITE $TESTNAME $@" + log " SKIP: $TESTSUITE $TESTNAME $*" if [[ -n "$ALWAYS_SKIPPED" ]]; then skip_logged $TESTNAME "$@" @@ -6329,21 +7067,54 @@ skip_noexit() { fi [[ -n "$TESTSUITELOG" ]] && - echo "$TESTSUITE: SKIP: $TESTNAME $@" >> $TESTSUITELOG || true + echo "$TESTSUITE: SKIP: $TESTNAME $*" >> $TESTSUITELOG || true unset TESTNAME } skip() { - skip_noexit $@ + skip_noexit "$@" exit 0 } +# +# For interop testing treate EOPNOTSUPP as success +# and skip +# +skip_eopnotsupp() { + local retstr=$@ + + echo $retstr | awk -F'|' '{print $1}' | + grep -E unsupported\|"(Operation not supported)" + (( $? == 0 )) || error "$retstr" + skip $retstr +} + +# Add a list of tests to ALWAYS_EXCEPT due to an issue. +# Usage: always_except LU-4815 23 42q ... +# +function always_except() { + local issue="${1:-}" # single jira style issue ("LU-4815") + local test_num + + shift + + if ! [[ "$issue" =~ ^[[:upper:]]+-[[:digit:]]+$ ]]; then + error "always_except: invalid issue '$issue' for tests '$*'" + fi + + for test_num in "$@"; do + ALWAYS_EXCEPT+=" $test_num" + done +} + build_test_filter() { EXCEPT="$EXCEPT $(testslist_filter)" - for O in $ONLY; do - if [[ $O = [0-9]*-[0-9]* ]]; then - for num in $(seq $(echo $O | tr '-' ' ')); do + # allow test numbers separated by '+', or ',', in addition to ' ' + # to avoid issues with multiple arguments handling by shell/autotest + for O in ${ONLY//[+,]/ }; do + if [[ $O =~ [0-9]*-[0-9]* ]]; then + for ((num=${O%-[0-9]*}; num <= ${O#[0-9]*-}; num++)); do eval ONLY_$num=true done else @@ -6351,30 +7122,30 @@ build_test_filter() { fi done - [ "$EXCEPT$ALWAYS_EXCEPT" ] && - log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`" - [ "$EXCEPT_SLOW" ] && - log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`" - for E in $EXCEPT; do + [[ -z "$EXCEPT$ALWAYS_EXCEPT" ]] || + log "excepting tests: $(echo $EXCEPT $ALWAYS_EXCEPT)" + [[ -z "$EXCEPT_SLOW" ]] || + log "skipping tests SLOW=no: $(echo $EXCEPT_SLOW)" + for E in ${EXCEPT//[+,]/ }; do eval EXCEPT_${E}=true done - for E in $ALWAYS_EXCEPT; do + for E in ${ALWAYS_EXCEPT//[+,]/ }; do eval EXCEPT_ALWAYS_${E}=true done - for E in $EXCEPT_SLOW; do + for E in ${EXCEPT_SLOW//[+,]/ }; do eval EXCEPT_SLOW_${E}=true done - for G in $GRANT_CHECK_LIST; do + for G in ${GRANT_CHECK_LIST//[+,]/ }; do eval GCHECK_ONLY_${G}=true done } basetest() { - if [[ $1 = [a-z]* ]]; then - echo $1 - else - echo ${1%%[a-zA-Z]*} - fi + if [[ $1 = [a-z]* ]]; then + echo $1 + else + echo ${1%%[a-zA-Z]*} + fi } # print a newline if the last test was skipped @@ -6439,7 +7210,8 @@ run_test() { local isonly_base=ONLY_$base if [[ ${!isonly}x != x || ${!isonly_base}x != x ]]; then - if [[ -n "$ALWAYS_SKIPPED" && -n "$HONOR_EXCEPT" ]]; then + if [[ -n "$ALWAYS_SKIPPED" && + -n "$HONOR_EXCEPT" ]]; then LAST_SKIPPED="y" skip_noexit "$skip_message" return 0 @@ -6471,33 +7243,34 @@ log() { echo "$*" >&2 load_module ../libcfs/libcfs/libcfs - local MSG="$*" - # Get rid of ' - MSG=${MSG//\'/\\\'} - MSG=${MSG//\(/\\\(} - MSG=${MSG//\)/\\\)} - MSG=${MSG//\;/\\\;} - MSG=${MSG//\|/\\\|} - MSG=${MSG//\>/\\\>} - MSG=${MSG//\ /dev/null || true + local MSG="$*" + # Get rid of ' + MSG=${MSG//\'/\\\'} + MSG=${MSG//\*/\\\*} + MSG=${MSG//\(/\\\(} + MSG=${MSG//\)/\\\)} + MSG=${MSG//\;/\\\;} + MSG=${MSG//\|/\\\|} + MSG=${MSG//\>/\\\>} + MSG=${MSG//\ /dev/null || true } trace() { - log "STARTING: $*" - strace -o $TMP/$1.strace -ttt $* - RC=$? - log "FINISHED: $*: rc $RC" - return 1 + log "STARTING: $*" + strace -o $TMP/$1.strace -ttt $* + RC=$? + log "FINISHED: $*: rc $RC" + return 1 } -complete () { - local duration=$1 +complete_test() { + local duration=$1 - banner test complete, duration $duration sec - [ -f "$TESTSUITELOG" ] && egrep .FAIL $TESTSUITELOG || true - echo duration $duration >>$TESTSUITELOG + banner "test complete, duration $duration sec" + [ -f "$TESTSUITELOG" ] && egrep .FAIL $TESTSUITELOG || true + echo "duration $duration" >>$TESTSUITELOG } pass() { @@ -6509,23 +7282,24 @@ pass() { elif [[ -f $LOGDIR/skip ]]; then TEST_STATUS="SKIP" fi - echo "$TEST_STATUS $@" 2>&1 | tee -a $TESTSUITELOG + echo "$TEST_STATUS $*" 2>&1 | tee -a $TESTSUITELOG } check_mds() { - local FFREE=$(do_node $SINGLEMDS \ + local FFREE=$(do_node $SINGLEMDS \ lctl get_param -n osd*.*MDT*.filesfree | calc_sum) - local FTOTAL=$(do_node $SINGLEMDS \ + local FTOTAL=$(do_node $SINGLEMDS \ lctl get_param -n osd*.*MDT*.filestotal | calc_sum) - [ $FFREE -ge $FTOTAL ] && error "files free $FFREE > total $FTOTAL" || true + [ $FFREE -ge $FTOTAL ] && error "files free $FFREE > total $FTOTAL" || + true } reset_fail_loc () { - echo -n "Resetting fail_loc on all nodes..." - do_nodes $(comma_list $(nodes_list)) "lctl set_param -n fail_loc=0 \ - fail_val=0 2>/dev/null" || true - echo done. + #echo -n "Resetting fail_loc on all nodes..." + do_nodes --quiet $(comma_list $(nodes_list)) \ + "lctl set_param -n fail_loc=0 fail_val=0 2>/dev/null" || true + #echo done. } @@ -6534,14 +7308,15 @@ reset_fail_loc () { # Also appends a timestamp and prepends the testsuite name. # -EQUALS="====================================================================================================" +# ======================================================== 15:06:12 (1624050372) +EQUALS="========================================================" banner() { - msg="== ${TESTSUITE} $*" - last=${msg: -1:1} - [[ $last != "=" && $last != " " ]] && msg="$msg " - msg=$(printf '%s%.*s' "$msg" $((${#EQUALS} - ${#msg})) $EQUALS ) - # always include at least == after the message - log "$msg== $(date +"%H:%M:%S (%s)")" + msg="== ${TESTSUITE} $*" + last=${msg: -1:1} + [[ $last != "=" && $last != " " ]] && msg="$msg " + msg=$(printf '%s%.*s' "$msg" $((${#EQUALS} - ${#msg})) $EQUALS ) + # always include at least == after the message + log "$msg== $(date +"%H:%M:%S (%s)")" } check_dmesg_for_errors() { @@ -6550,7 +7325,7 @@ check_dmesg_for_errors() { ldiskfs_check_descriptors: Checksum for group 0 failed\|\ group descriptors corrupted" - res=$(do_nodes $(comma_list $(nodes_list)) "dmesg" | grep "$errors") + res=$(do_nodes -q $(comma_list $(nodes_list)) "dmesg" | grep "$errors") [ -z "$res" ] && return 0 echo "Kernel error detected: $res" return 1 @@ -6604,21 +7379,34 @@ run_one_logged() { local zfs_debug_log=$TESTLOG_PREFIX.$TESTNAME.zfs_log local SAVE_UMASK=$(umask) local rc=0 + local node + declare -A kptr_restrict + declare -A debug_raw umask 0022 + for node in $(all_nodes); do + kptr_restrict[$node]=$(do_node $node "sysctl --values kernel/kptr_restrict") + do_node $node "sysctl -wq kernel/kptr_restrict=1" + # Enable %p to be unhashed (if supported) + debug_raw[$node]=$(do_node $node "$LCTL get_param -n debug_raw_pointers" || echo 0) + do_node $node "$LCTL set_param debug_raw_pointers=Y || true" + done + rm -f $LOGDIR/err $LOGDIR/ignore $LOGDIR/skip echo - # if ${ONLY_$testnum} set, repeat $ONLY_REPEAT times, otherwise once - local isonly=ONLY_$testnum - local repeat=${!isonly:+$ONLY_REPEAT} + # if $ONLY is set, repeat subtest $ONLY_REPEAT times, otherwise once + local repeat=${ONLY:+$ONLY_REPEAT} - for testiter in $(seq ${repeat:-1}); do + for ((testiter=0; testiter < ${repeat:-1}; testiter++)); do local before_sub=$SECONDS - log_sub_test_begin $TESTNAME + log_sub_test_begin $TESTNAME # remove temp files between repetitions to avoid test failures - [ -n "$append" -a -n "$DIR" -a -n "$tdir" -a -n "$tfile" ] && - rm -rvf $DIR/$tdir* $DIR/$tfile* + if [[ -n "$append" ]]; then + [[ -n "$tdir" ]] && rm -rvf $DIR/$tdir* + [[ -n "$tfile" ]] && rm -vf $DIR/$tfile* + echo "subtest iteration $testiter/$repeat" + fi # loop around subshell so stack_trap EXIT triggers each time (run_one $testnum "$testmsg") 2>&1 | tee -i $append $test_log rc=${PIPESTATUS[0]} @@ -6642,8 +7430,21 @@ run_one_logged() { fi pass "$testnum" "(${duration_sub}s)" + if [ -n "${DUMP_OK}" ]; then + gather_logs $(comma_list $(nodes_list)) + fi + log_sub_test_end $TEST_STATUS $duration_sub "$rc" "$test_error" - [[ $rc != 0 ]] && break + [[ $rc != 0 || "$TEST_STATUS" != "PASS" ]] && break + done + + local param + for node in $(all_nodes); do + param="kernel/kptr_restrict=${kptr_restrict[$node]}" + do_node $node "sysctl -wq ${param} || true" + # Restore %p to initial state + param="debug_raw_pointers=${debug_raw[$node]}" + do_node $node "$LCTL set_param ${param} || true" done if [[ "$TEST_STATUS" != "SKIP" && -f $TF_SKIP ]]; then @@ -6673,10 +7474,6 @@ skip_logged(){ log_sub_test_end "SKIP" "0" "0" "$@" } -canonical_path() { - (cd $(dirname $1); echo $PWD/$(basename $1)) -} - grant_from_clients() { local nodes="$1" @@ -6715,7 +7512,7 @@ check_grant() { # sync all the data and make sure no pending data on server do_nodes $clients sync - clients_up # initiate all idling connections + do_nodes $clients $LFS df # initiate all idling connections # get client grant cli_grant=$(grant_from_clients $clients) @@ -6746,44 +7543,50 @@ check_grant() { ######################## # helper functions -osc_to_ost() -{ - osc=$1 - ost=`echo $1 | awk -F_ '{print $3}'` - if [ -z $ost ]; then - ost=`echo $1 | sed 's/-osc.*//'` - fi - echo $ost +osc_to_ost() { + local osc=$1 + + echo ${osc/-osc*/} } -ostuuid_from_index() -{ - $LFS osts $2 | sed -ne "/^$1: /s/.* \(.*\) .*$/\1/p" +ostuuid_from_index() { + # only print the first UUID, if 'lfs osts' shows multiple mountpoints + local uuid=($($LFS osts $2 | sed -ne "/^$1: /s/.* \(.*\) .*$/\1/p")) + + echo ${uuid} } ostname_from_index() { - local uuid=$(ostuuid_from_index $1) - echo ${uuid/_UUID/} + local uuid=$(ostuuid_from_index $1 $2) + + echo ${uuid/_UUID/} +} + +mdtuuid_from_index() { + # only print the first UUID, if 'lfs osts' shows multiple mountpoints + local uuid=($($LFS mdts $2 | sed -ne "/^$1: /s/.* \(.*\) .*$/\1/p")) + + echo ${uuid} } mdtname_from_index() { - local uuid=$(mdtuuid_from_index $1) + local uuid=$(mdtuuid_from_index $1 $2) + echo ${uuid/_UUID/} } -mdssize_from_index () { +mdssize_from_index() { local mdt=$(mdtname_from_index $2) - $LFS df $1 | grep $mdt | awk '{ print $2 }' + + $LFS df $1 | awk "/$mdt/ { print \$2 }" } index_from_ostuuid() { - $LFS osts $2 | sed -ne "/${1}/s/\(.*\): .* .*$/\1/p" -} + # only print the first index, if 'lfs osts' shows multiple mountpoints + local ostidx=($($LFS osts $2 | sed -ne "/${1}/s/\(.*\): .* .*$/\1/p")) -mdtuuid_from_index() -{ - $LFS mdts $2 | sed -ne "/^$1: /s/.* \(.*\) .*$/\1/p" + echo ${ostidx} } # Description: @@ -6799,6 +7602,41 @@ local_addr_list() { ip addr | awk '/inet / {print $2}' | awk -F/ '{print $1}' } +# Description: +# Returns list of interfaces configured for LNet +lnet_if_list() { + local nids=( $($LCTL list_nids | xargs echo) ) + + [[ -z ${nids[@]} ]] && + return 0 + + if [[ ${NETTYPE} =~ kfi* ]]; then + $LNETCTL net show 2>/dev/null | awk '/ cxi[0-9]+$/{print $NF}' | + sort -u | xargs echo + return 0 + fi + + declare -a INTERFACES + + for ((i = 0; i < ${#nids[@]}; i++)); do + ip=$(sed 's/^\(.*\)@.*$/\1/'<<<${nids[i]}) + INTERFACES[i]=$(ip -o a s | + awk '$4 ~ /^'$ip'\//{print $2}') + INTERFACES=($(echo "${INTERFACES[@]}" | tr ' ' '\n' | uniq | tr '\n' ' ')) + if [[ -z ${INTERFACES[i]} ]]; then + error "Can't determine interface name for NID ${nids[i]}" + elif [[ 1 -ne $(wc -w <<<${INTERFACES[i]}) ]]; then + error "Found $(wc -w <<<${INTERFACES[i]}) interfaces for NID ${nids[i]}. Expect 1" + fi + done + + echo "${INTERFACES[@]}" + + return 0 +} + +# return 1 if addr is remote +# return 0 if addr is local is_local_addr() { local addr=$1 # Cache address list to avoid mutiple execution of local_addr_list @@ -6810,30 +7648,34 @@ is_local_addr() { return 1 } +# return true(0) if host_name is local +# return false(1) if host_name is remote local_node() { local host_name=$1 local is_local="IS_LOCAL_$(host_id $host_name)" + if [ -z "${!is_local-}" ] ; then - eval $is_local=0 - local host_ip=$($LUSTRE/tests/resolveip $host_name) - is_local_addr "$host_ip" && eval $is_local=1 + eval $is_local=false + local host_ip=$(getent ahostsv4 $host_name | + awk 'NR == 1 { print $1 }') + is_local_addr "$host_ip" && eval $is_local=true fi - [[ "${!is_local}" == "1" ]] + ${!is_local} } remote_node () { local node=$1 - local_node $node && return 1 - return 0 + + ! local_node $node } remote_mds () { - local node - for node in $(mdts_nodes); do - remote_node $node && return 0 - done - return 1 + local node + for node in $(mdts_nodes); do + remote_node $node && return 0 + done + return 1 } remote_mds_nodsh() @@ -6851,11 +7693,11 @@ require_dsh_mds() remote_ost () { - local node - for node in $(osts_nodes) ; do - remote_node $node && return 0 - done - return 1 + local node + for node in $(osts_nodes) ; do + remote_node $node && return 0 + done + return 1 } remote_ost_nodsh() @@ -6866,9 +7708,9 @@ remote_ost_nodsh() require_dsh_ost() { - remote_ost_nodsh && echo "SKIP: $TESTSUITE: remote OST with nodsh" && \ - OSKIPPED=1 && return 1 - return 0 + remote_ost_nodsh && echo "SKIP: $TESTSUITE: remote OST with nodsh" && + OSKIPPED=1 && return 1 + return 0 } remote_mgs_nodsh() @@ -6881,12 +7723,12 @@ remote_mgs_nodsh() local_mode () { - remote_mds_nodsh || remote_ost_nodsh || \ - $(single_local_node $(comma_list $(nodes_list))) + remote_mds_nodsh || remote_ost_nodsh || + $(single_local_node $(comma_list $(nodes_list))) } remote_servers () { - remote_ost && remote_mds + remote_ost && remote_mds } # Get the active nodes for facets. @@ -6907,8 +7749,8 @@ facets_nodes () { # Get name of the active MGS node. mgs_node () { - echo -n $(facets_nodes $(get_facets MGS)) -} + echo -n $(facets_nodes $(get_facets MGS)) + } # Get all of the active MDS nodes. mdts_nodes () { @@ -6920,11 +7762,6 @@ osts_nodes () { echo -n $(facets_nodes $(get_facets OST)) } -# Get all of the active AGT (HSM agent) nodes. -agts_nodes () { - echo -n $(facets_nodes $(get_facets AGT)) -} - # Get all of the client nodes and active server nodes. nodes_list () { local nodes=$HOSTNAME @@ -7015,39 +7852,43 @@ all_nodes () { } init_clients_lists () { - # Sanity check: exclude the local client from RCLIENTS - local clients=$(hostlist_expand "$RCLIENTS") - local rclients=$(exclude_items_from_list "$clients" $HOSTNAME) + # Sanity check: exclude the local client from RCLIENTS + local clients=$(hostlist_expand "$RCLIENTS") + local rclients=$(exclude_items_from_list "$clients" $HOSTNAME) - # Sanity check: exclude the dup entries - RCLIENTS=$(for i in ${rclients//,/ }; do echo $i; done | sort -u) + # Sanity check: exclude the dup entries + RCLIENTS=$(for i in ${rclients//,/ }; do echo $i; done | sort -u) - clients="$SINGLECLIENT $HOSTNAME $RCLIENTS" + export CLIENT1=${CLIENT1:-$HOSTNAME} + export SINGLECLIENT=$CLIENT1 - # Sanity check: exclude the dup entries from CLIENTS - # for those configs which has SINGLCLIENT set to local client - clients=$(for i in $clients; do echo $i; done | sort -u) + clients="$SINGLECLIENT $HOSTNAME $RCLIENTS" - CLIENTS=$(comma_list $clients) - local -a remoteclients=($RCLIENTS) - for ((i=0; $i<${#remoteclients[@]}; i++)); do - varname=CLIENT$((i + 2)) - eval $varname=${remoteclients[i]} - done + # Sanity check: exclude the dup entries from CLIENTS + # for those configs which has SINGLCLIENT set to local client + clients=$(for i in $clients; do echo $i; done | sort -u) - CLIENTCOUNT=$((${#remoteclients[@]} + 1)) + export CLIENTS=$(comma_list $clients) + local -a remoteclients=($RCLIENTS) + for ((i=0; $i<${#remoteclients[@]}; i++)); do + varname=CLIENT$((i + 2)) + + eval export $varname=${remoteclients[i]} + done + + export CLIENTCOUNT=$((${#remoteclients[@]} + 1)) } get_random_entry () { - local rnodes=$1 + local rnodes=$1 - rnodes=${rnodes//,/ } + rnodes=${rnodes//,/ } - local -a nodes=($rnodes) - local num=${#nodes[@]} - local i=$((RANDOM * num * 2 / 65536)) + local -a nodes=($rnodes) + local num=${#nodes[@]} + local i=$((RANDOM * num * 2 / 65536)) - echo ${nodes[i]} + echo ${nodes[i]} } client_only () { @@ -7055,35 +7896,42 @@ client_only () { } check_versions () { - [ "$(lustre_version_code client)" = "$(lustre_version_code $SINGLEMDS)" -a \ - "$(lustre_version_code client)" = "$(lustre_version_code ost1)" ] + # this should already have been called, but just in case + [[ -n "$CLIENT_VERSION" && -n "$MDS1_VERSION" && -n "$OST1_VERSION" ]]|| + get_lustre_env + + echo "client=$CLIENT_VERSION MDS=$MDS1_VERSION OSS=$OST1_VERSION" + + [[ -n "$CLIENT_VERSION" && -n "$MDS1_VERSION" && -n "$OST1_VERSION" ]]|| + error "unable to determine node versions" + + (( "$CLIENT_VERSION" == "$MDS1_VERSION" && + "$CLIENT_VERSION" == "$OST1_VERSION")) } get_node_count() { - local nodes="$@" - echo $nodes | wc -w || true -} + local nodes="$@" -mixed_ost_devs () { - local nodes=$(osts_nodes) - local osscount=$(get_node_count "$nodes") - [ ! "$OSTCOUNT" = "$osscount" ] + echo $nodes | wc -w || true } mixed_mdt_devs () { - local nodes=$(mdts_nodes) - local mdtcount=$(get_node_count "$nodes") - [ ! "$MDSCOUNT" = "$mdtcount" ] + local nodes=$(mdts_nodes) + local mdtcount=$(get_node_count "$nodes") + + [ ! "$MDSCOUNT" = "$mdtcount" ] } generate_machine_file() { - local nodes=${1//,/ } - local machinefile=$2 - rm -f $machinefile - for node in $nodes; do - echo $node >>$machinefile || \ - { echo "can not generate machinefile $machinefile" && return 1; } - done + local nodes=${1//,/ } + local machinefile=$2 + + rm -f $machinefile + for node in $nodes; do + echo $node >>$machinefile || + { echo "can not generate machinefile $machinefile" && + return 1; } + done } get_stripe () { @@ -7094,23 +7942,6 @@ get_stripe () { rm -f $file } -setstripe_nfsserver () { - local dir=$1 - local nfsexportdir=$2 - shift - shift - - local -a nfsexport=($(awk '"'$dir'" ~ $2 && $3 ~ "nfs" && $2 != "/" \ - { print $1 }' /proc/mounts | cut -f 1 -d :)) - - # check that only one nfs mounted - [[ -z $nfsexport ]] && echo "$dir is not nfs mounted" && return 1 - (( ${#nfsexport[@]} == 1 )) || - error "several nfs mounts found for $dir: ${nfsexport[@]} !" - - do_nodev ${nfsexport[0]} lfs setstripe $nfsexportdir "$@" -} - # Check and add a test group. add_group() { local group_id=$1 @@ -7125,6 +7956,9 @@ add_group() { rc=1 } else + echo "adding group $group_name:$group_id" + getent group $group_name || true + getent group $group_id || true groupadd -g $group_id $group_name rc=${PIPESTATUS[0]} fi @@ -7194,8 +8028,7 @@ check_runas_id_ret() { mkdir $DIR/d0_runas_test chmod 0755 $DIR chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test - $myRUNAS -u $myRUNAS_UID -g $myRUNAS_GID touch $DIR/d0_runas_test/f$$ || - myRC=$? + $myRUNAS touch $DIR/d0_runas_test/f$$ || myRC=$? rm -rf $DIR/d0_runas_test return $myRC } @@ -7213,60 +8046,43 @@ check_runas_id() { # obtain the UID/GID for MPI_USER get_mpiuser_id() { - local mpi_user=$1 + local mpi_user=$1 - MPI_USER_UID=$(do_facet client "getent passwd $mpi_user | cut -d: -f3; + MPI_USER_UID=$(do_facet client "getent passwd $mpi_user | cut -d: -f3; exit \\\${PIPESTATUS[0]}") || error_exit "failed to get the UID for $mpi_user" - MPI_USER_GID=$(do_facet client "getent passwd $mpi_user | cut -d: -f4; + MPI_USER_GID=$(do_facet client "getent passwd $mpi_user | cut -d: -f4; exit \\\${PIPESTATUS[0]}") || error_exit "failed to get the GID for $mpi_user" } -# obtain and cache Kerberos ticket-granting ticket -refresh_krb5_tgt() { - local myRUNAS_UID=$1 - local myRUNAS_GID=$2 - shift 2 - local myRUNAS=$@ - if [ -z "$myRUNAS" ]; then - error_exit "myRUNAS command must be specified for refresh_krb5_tgt" - fi - - CLIENTS=${CLIENTS:-$HOSTNAME} - do_nodes $CLIENTS "set -x -if ! $myRUNAS krb5_login.sh; then - echo "Failed to refresh Krb5 TGT for UID/GID $myRUNAS_UID/$myRUNAS_GID." - exit 1 -fi" -} - # Run multiop in the background, but wait for it to print # "PAUSING" to its stdout before returning from this function. multiop_bg_pause() { - MULTIOP_PROG=${MULTIOP_PROG:-$MULTIOP} - FILE=$1 - ARGS=$2 + MULTIOP_PROG=${MULTIOP_PROG:-$MULTIOP} + FILE=$1 + ARGS=$2 - TMPPIPE=/tmp/multiop_open_wait_pipe.$$ - mkfifo $TMPPIPE + TMPPIPE=/tmp/multiop_open_wait_pipe.$$ + mkfifo $TMPPIPE - echo "$MULTIOP_PROG $FILE v$ARGS" - $MULTIOP_PROG $FILE v$ARGS > $TMPPIPE & + echo "$MULTIOP_PROG $FILE v$ARGS" + $MULTIOP_PROG $FILE v$ARGS > $TMPPIPE & + local pid=$! - echo "TMPPIPE=${TMPPIPE}" - read -t 60 multiop_output < $TMPPIPE - if [ $? -ne 0 ]; then - rm -f $TMPPIPE - return 1 - fi - rm -f $TMPPIPE - if [ "$multiop_output" != "PAUSING" ]; then - echo "Incorrect multiop output: $multiop_output" - kill -9 $PID - return 1 - fi + echo "TMPPIPE=${TMPPIPE}" + read -t 60 multiop_output < $TMPPIPE + if [ $? -ne 0 ]; then + rm -f $TMPPIPE + return 1 + fi + rm -f $TMPPIPE + if [ "$multiop_output" != "PAUSING" ]; then + echo "Incorrect multiop output: $multiop_output" + kill -9 $pid + return 1 + fi - return 0 + return 0 } do_and_time () { @@ -7285,17 +8101,30 @@ do_and_time () { inodes_available () { local IFree=$($LFS df -i $MOUNT | grep ^$FSNAME | awk '{ print $4 }' | sort -un | head -n1) || return 1 + echo $((IFree)) } mdsrate_inodes_available () { local min_inodes=$(inodes_available) + echo $((min_inodes * 99 / 100)) } +bytes_available () { + echo $(df -P -B 1 "$MOUNT" | awk 'END {print $4}') +} + +mdsrate_bytes_available () { + local bytes=$(bytes_available) + + echo $((bytes * 99 / 100)) +} + # reset stat counters clear_stats() { local paramfile="$1" + lctl set_param -n $paramfile=0 } @@ -7303,6 +8132,7 @@ clear_stats() { calc_stats() { local paramfile="$1" local stat="$2" + lctl get_param -n $paramfile | awk '/^'$stat'/ { sum += $2 } END { printf("%0.0f", sum) }' } @@ -7316,6 +8146,29 @@ calc_osc_kbytes () { $LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum } +free_min_max () { + wait_delete_completed + AVAIL=($(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail)) + echo "OST kbytes available: ${AVAIL[*]}" + MAXV=${AVAIL[0]} + MAXI=0 + MINV=${AVAIL[0]} + MINI=0 + for ((i = 0; i < ${#AVAIL[@]}; i++)); do + #echo OST $i: ${AVAIL[i]}kb + if [[ ${AVAIL[i]} -gt $MAXV ]]; then + MAXV=${AVAIL[i]} + MAXI=$i + fi + if [[ ${AVAIL[i]} -lt $MINV ]]; then + MINV=${AVAIL[i]} + MINI=$i + fi + done + echo "Min free space: OST $MINI: $MINV" + echo "Max free space: OST $MAXI: $MAXV" +} + # save_lustre_params(comma separated facet list, parameter_mask) # generate a stream of formatted strings ( =) save_lustre_params() { @@ -7348,15 +8201,18 @@ restore_lustre_params() { check_node_health() { local nodes=${1:-$(comma_list $(nodes_list))} - - for node in ${nodes//,/ }; do - check_network "$node" 5 - if [ $? -eq 0 ]; then - do_node $node "$LCTL get_param catastrophe 2>&1" | - grep -q "catastrophe=1" && - error "$node:LBUG/LASSERT detected" || true - fi - done + local health=$TMP/node_health.$$ + + do_nodes -q $nodes "$LCTL get_param catastrophe 2>&1" | tee $health | + grep "catastrophe=1" && error "LBUG/LASSERT detected" + # Only check/report network health if get_param isn't reported, since + # *clearly* the network is working if get_param returned something. + if (( $(grep -c catastro $health) != $(wc -w <<< ${nodes//,/ }) )); then + for node in ${nodes//,/ }; do + check_network $node 5 + done + fi + rm -f $health } mdsrate_cleanup () { @@ -7367,9 +8223,139 @@ mdsrate_cleanup () { fi } -delayed_recovery_enabled () { - local var=${SINGLEMDS}_svc - do_facet $SINGLEMDS lctl get_param -n mdd.${!var}.stale_export_age > /dev/null 2>&1 +run_mdtest () { + local test_type="$1" + local file_size=0 + local num_files=0 + local num_cores=0 + local num_procs=0 + local num_hosts=0 + local free_space=0 + local num_inodes=0 + local num_entries=0 + local num_dirs=0 + local np=0 + local rc=0 + + local mdtest_basedir + local mdtest_actions + local mdtest_options + local stripe_options + local params_file + + case "$test_type" in + create-small) + stripe_options=(-c 1 -i 0) + mdtest_actions=(-F -R) + file_size=1024 + num_files=100000 + ;; + create-large) + stripe_options=(-c -1) + mdtest_actions=(-F -R) + file_size=$((1024 * 1024 * 1024)) + num_files=16 + ;; + lookup-single) + stripe_options=(-c 1) + mdtest_actions=(-C -D -E -k -r) + num_dirs=1 + num_files=100000 + ;; + lookup-multi) + stripe_options=(-c 1) + mdtest_actions=(-C -D -E -k -r) + num_dirs=100 + num_files=1000 + ;; + *) + stripe_options=(-c -1) + mdtest_actions=() + num_files=100000 + ;; + esac + + if [[ -n "$MDTEST_DEBUG" ]]; then + mdtest_options+=(-v -v -v) + fi + + num_dirs=${NUM_DIRS:-$num_dirs} + num_files=${NUM_FILES:-$num_files} + file_size=${FILE_SIZE:-$file_size} + free_space=$(mdsrate_bytes_available) + + if (( file_size * num_files > free_space )); then + file_size=$((free_space / num_files)) + log "change file size to $file_size due to" \ + "number of files $num_files and" \ + "free space limit in $free_space" + fi + + if (( file_size > 0 )); then + log "set file size to $file_size" + mdtest_options+=(-w=$file_size) + fi + + params_file=$TMP/$TESTSUITE-$TESTNAME.parameters + mdtest_basedir=$MOUNT/mdtest + mdtest_options+=(-d=$mdtest_basedir) + + num_cores=$(nproc) + num_hosts=$(get_node_count ${CLIENTS//,/ }) + num_procs=$((num_cores * num_hosts)) + num_inodes=$(mdsrate_inodes_available) + + if (( num_inodes < num_files )); then + log "change the number of files $num_files to the" \ + "number of available inodes $num_inodes" + num_files=$num_inodes + fi + + if (( num_dirs > 1 )); then + num_entries=$((num_files / num_dirs)) + log "split $num_files files to $num_dirs" \ + "with $num_entries files each" + mdtest_options+=(-I=$num_entries) + fi + + generate_machine_file $CLIENTS $MACHINEFILE || + error "can not generate machinefile" + + install -v -d -m 0777 $mdtest_basedir + + setstripe_getstripe $mdtest_basedir ${stripe_options[@]} + + save_lustre_params $(get_facets MDS) \ + mdt.*.enable_remote_dir_gid > $params_file + + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param mdt.*.enable_remote_dir_gid=-1 + + stack_trap "restore_lustre_params < $params_file" EXIT + + for np in 1 $num_procs; do + num_entries=$((num_files / np )) + + mpi_run $MACHINEFILE_OPTION $MACHINEFILE \ + -np $np -npernode $num_cores $MDTEST \ + ${mdtest_options[@]} -n=$num_entries \ + ${mdtest_actions[@]} 2>&1 | tee -a "$LOG" + + rc=${PIPESTATUS[0]} + + if (( rc != 0 )); then + mpi_run $MACHINEFILE_OPTION $MACHINEFILE \ + -np $np -npernode $num_cores $MDTEST \ + ${mdtest_options[@]} -n=$num_entries \ + -r 2>&1 | tee -a "$LOG" + break + fi + done + + rmdir -v $mdtest_basedir + rm -v $state $MACHINEFILE + + return $rc } ######################## @@ -7385,7 +8371,7 @@ convert_facet2label() { local varsvc=${facet}_svc - if [ -n ${!varsvc} ]; then + if [ -n "${!varsvc}" ]; then echo ${!varsvc} else error "No label for $facet!" @@ -7396,20 +8382,6 @@ get_clientosc_proc_path() { echo "${1}-osc-[-0-9a-f]*" } -# If the 2.0 MDS was mounted on 1.8 device, then the OSC and LOV names -# used by MDT would not be changed. -# mdt lov: fsname-mdtlov -# mdt osc: fsname-OSTXXXX-osc -mds_on_old_device() { - local mds=${1:-"$SINGLEMDS"} - - if [ $(lustre_version_code $mds) -gt $(version_code 1.9.0) ]; then - do_facet $mds "lctl list_param osc.$FSNAME-OST*-osc \ - > /dev/null 2>&1" && return 0 - fi - return 1 -} - get_mdtosc_proc_path() { local mds_facet=$1 local ost_label=${2:-"*OST*"} @@ -7426,62 +8398,63 @@ get_mdtosc_proc_path() { } get_osc_import_name() { - local facet=$1 - local ost=$2 - local label=$(convert_facet2label $ost) + local facet=$1 + local ost=$2 + local label=$(convert_facet2label $ost) - if [ "${facet:0:3}" = "mds" ]; then - get_mdtosc_proc_path $facet $label - return 0 - fi + if [ "${facet:0:3}" = "mds" ]; then + get_mdtosc_proc_path $facet $label + return 0 + fi - get_clientosc_proc_path $label - return 0 + get_clientosc_proc_path $label + return 0 } _wait_import_state () { - local expected=$1 - local CONN_PROC=$2 - local maxtime=${3:-$(max_recovery_time)} - local error_on_failure=${4:-1} - local CONN_STATE - local i=0 + local expected="$1" + local CONN_PROC="$2" + local maxtime=${3:-$(max_recovery_time)} + local err_on_fail=${4:-1} + local CONN_STATE + local i=0 CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq) - while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do - if [ "${expected}" == "DISCONN" ]; then - # for disconn we can check after proc entry is removed - [ "x${CONN_STATE}" == "x" ] && return 0 - # with AT enabled, we can have connect request timeout near of - # reconnect timeout and test can't see real disconnect - [ "${CONN_STATE}" == "CONNECTING" ] && return 0 - fi - if [ $i -ge $maxtime ]; then - [ $error_on_failure -ne 0 ] && \ - error "can't put import for $CONN_PROC into ${expected}" \ - "state after $i sec, have ${CONN_STATE}" - return 1 - fi - sleep 1 - # Add uniq for multi-mount case - CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq) - i=$(($i + 1)) - done + while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do + if [[ "${expected}" == "DISCONN" ]]; then + # for disconn we can check after proc entry is removed + [[ -z "${CONN_STATE}" ]] && return 0 + # with AT, we can have connect request timeout near + # reconnect timeout and test can't see real disconnect + [[ "${CONN_STATE}" == "CONNECTING" ]] && return 0 + fi + if (( $i >= $maxtime )); then + (( $err_on_fail != 0 )) && + error "can't put import for $CONN_PROC into ${expected} state after $i sec, have ${CONN_STATE}" + return 1 + fi + sleep 1 + # Add uniq for multi-mount case + CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | + cut -f2 | uniq) + i=$((i + 1)) + done - log "$CONN_PROC in ${CONN_STATE} state after $i sec" - return 0 + log "$CONN_PROC in ${CONN_STATE} state after $i sec" + return 0 } wait_import_state() { - local state=$1 - local params=$2 - local maxtime=${3:-$(max_recovery_time)} - local error_on_failure=${4:-1} - local param - - for param in ${params//,/ }; do - _wait_import_state $state $param $maxtime $error_on_failure || return - done + local expected="$1" + local params="$2" + local maxtime=${3:-$(max_recovery_time)} + local err_on_fail=${4:-1} + local param + + for param in ${params//,/ }; do + _wait_import_state "$expected" "$param" $maxtime $err_on_fail || + return + done } wait_import_state_mount() { @@ -7489,7 +8462,7 @@ wait_import_state_mount() { return 0 fi - wait_import_state $* + wait_import_state "$@" } # One client request could be timed out because server was not ready @@ -7521,15 +8494,15 @@ wait_import_state_mount() { # #define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20) request_timeout () { - local facet=$1 + local facet=$1 - # request->rq_timeout = INITIAL_CONNECT_TIMEOUT - local init_connect_timeout=$TIMEOUT - [[ $init_connect_timeout -ge 5 ]] || init_connect_timeout=5 + # request->rq_timeout = INITIAL_CONNECT_TIMEOUT + local init_connect_timeout=$TIMEOUT + [[ $init_connect_timeout -ge 5 ]] || init_connect_timeout=5 - local at_min=$(at_get $facet at_min) + local at_min=$(at_get $facet at_min) - echo $(( init_connect_timeout + at_min )) + echo $(( init_connect_timeout + at_min )) } _wait_osc_import_state() { @@ -7651,7 +8624,7 @@ wait_mgc_import_state() { $error_on_failure || return done else - _wait_mgc_import_state "$facet" "$expected" + _wait_mgc_import_state "$facet" "$expected" \ $error_on_failure || return fi } @@ -7675,6 +8648,10 @@ get_clientmgc_proc_path() { } do_rpc_nodes () { + local quiet + + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + local list=$1 shift @@ -7684,18 +8661,21 @@ do_rpc_nodes () { local LIBPATH="/usr/lib/lustre/tests:/usr/lib64/lustre/tests:" local TESTPATH="$RLUSTRE/tests:" local RPATH="PATH=${TESTPATH}${LIBPATH}${PATH}:/sbin:/bin:/usr/sbin:" - do_nodesv $list "${RPATH} NAME=${NAME} bash rpc.sh $@ " + do_nodes ${quiet:-"--verbose"} $list "${RPATH} NAME=${NAME} \ + TESTLOG_PREFIX=$TESTLOG_PREFIX TESTNAME=$TESTNAME \ + bash rpc.sh $* " } wait_clients_import_state () { - local list=$1 - local facet=$2 - local expected=$3 - - local facets=$facet + local list="$1" + local facet="$2" + local expected="$3" + local facets="$facet" if [ "$FAILURE_MODE" = HARD ]; then - facets=$(facets_on_host $(facet_active_host $facet)) + facets=$(for f in ${facet//,/ }; do + facets_on_host $(facet_active_host $f) | tr "," "\n" + done | sort -u | paste -sd , ) fi for facet in ${facets//,/ }; do @@ -7703,11 +8683,11 @@ wait_clients_import_state () { local proc_path case $facet in ost* ) proc_path="osc.$(get_clientosc_proc_path \ - $label).ost_server_uuid" ;; + $label).ost_server_uuid" ;; mds* ) proc_path="mdc.$(get_clientmdc_proc_path \ - $label).mds_server_uuid" ;; + $label).mds_server_uuid" ;; mgs* ) proc_path="mgc.$(get_clientmgc_proc_path \ - $label).mgs_server_uuid" ;; + $label).mgs_server_uuid" ;; *) error "unknown facet!" ;; esac @@ -7721,6 +8701,10 @@ wait_clients_import_state () { fi } +wait_clients_import_ready() { + wait_clients_import_state "$1" "$2" "\(FULL\|IDLE\)" +} + wait_osp_active() { local facet=$1 local tgt_name=$2 @@ -7742,7 +8726,7 @@ wait_osp_active() { fi echo "check $mproc" - while [ 1 ]; do + while true; do sleep 5 local result=$(do_facet mds${num} "$LCTL get_param -n $mproc") local max=30 @@ -7805,6 +8789,7 @@ check_pool_not_exist() { create_pool() { local fsname=${1%%.*} local poolname=${1##$fsname.} + local keep_pools=${2:-false} stack_trap "destroy_test_pools $fsname" EXIT do_facet mgs lctl pool_new $1 @@ -7823,7 +8808,7 @@ create_pool() { wait_update $HOSTNAME "lctl get_param -n lov.$fsname-*.pools.$poolname \ 2>/dev/null || echo foo" "" || error "pool_new failed $1" - add_pool_to_list $1 + $keep_pools || add_pool_to_list $1 return $RC } @@ -7841,10 +8826,18 @@ remove_pool_from_list () { local poolname=${1##$fsname.} local listvar=${fsname}_CREATED_POOLS - local temp=${listvar}=$(exclude_items_from_list ${!listvar} $poolname) + local temp=${listvar}=$(exclude_items_from_list "${!listvar}" $poolname) eval export $temp } +# cleanup all pools exist on $FSNAME +destroy_all_pools () { + local i + for i in $(list_pool $FSNAME); do + destroy_pool $i + done +} + destroy_pool_int() { local ost local OSTS=$(list_pool $1) @@ -7865,8 +8858,7 @@ destroy_pool() { local RC - check_pool_not_exist $fsname.$poolname - [[ $? -eq 0 ]] && return 0 + check_pool_not_exist $fsname.$poolname && return 0 || true destroy_pool_int $fsname.$poolname RC=$? @@ -7901,7 +8893,6 @@ destroy_pools () { } destroy_test_pools () { - trap 0 local fsname=${1:-$FSNAME} destroy_pools $fsname || true } @@ -7931,13 +8922,13 @@ gather_logs () { $LCTL dk > ${prefix}.debug_log.$(hostname -s).${suffix} dmesg > ${prefix}.dmesg.$(hostname -s).${suffix} [ "$SHARED_KEY" = true ] && find $SK_PATH -name '*.key' -exec \ - lgss_sk -r {} \; &> \ + $LGSS_SK -r {} \; &> \ ${prefix}.ssk_keys.$(hostname -s).${suffix} [ "$SHARED_KEY" = true ] && lctl get_param 'nodemap.*.*' > \ ${prefix}.nodemaps.$(hostname -s).${suffix} - [ "$GSS_SK" = true ] && keyctl show > \ + [ "$GSS" = true ] && keyctl show > \ ${prefix}.keyring.$(hostname -s).${suffix} - [ "$GSS_SK" = true ] && journalctl -a > \ + [ "$GSS" = true ] && journalctl -a > \ ${prefix}.journal.$(hostname -s).${suffix} return fi @@ -7947,12 +8938,12 @@ gather_logs () { dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}" if [ "$SHARED_KEY" = true ]; then do_nodesv $list "find $SK_PATH -name '*.key' -exec \ - lgss_sk -r {} \; &> \ + $LGSS_SK -r {} \; &> \ ${prefix}.ssk_keys.\\\$(hostname -s).${suffix}" do_facet mds1 "lctl get_param 'nodemap.*.*' > \ ${prefix}.nodemaps.\\\$(hostname -s).${suffix}" fi - if [ "$GSS_SK" = true ]; then + if [ "$GSS" = true ]; then do_nodesv $list "keyctl show > \ ${prefix}.keyring.\\\$(hostname -s).${suffix}" do_nodesv $list "journalctl -a > \ @@ -7969,26 +8960,26 @@ gather_logs () { } do_ls () { - local mntpt_root=$1 - local num_mntpts=$2 - local dir=$3 - local i - local cmd - local pids - local rc=0 - - for i in $(seq 0 $num_mntpts); do - cmd="ls -laf ${mntpt_root}$i/$dir" - echo + $cmd; - $cmd > /dev/null & - pids="$pids $!" - done - echo pids=$pids - for pid in $pids; do - wait $pid || rc=$? - done - - return $rc + local mntpt_root=$1 + local num_mntpts=$2 + local dir=$3 + local i + local cmd + local pids + local rc=0 + + for i in $(seq 0 $num_mntpts); do + cmd="ls -laf ${mntpt_root}$i/$dir" + echo + $cmd; + $cmd > /dev/null & + pids="$pids $!" + done + echo pids=$pids + for pid in $pids; do + wait $pid || rc=$? + done + + return $rc } # check_and_start_recovery_timer() @@ -8030,7 +9021,7 @@ recovery_time_min() { initial_connect_timeout=$connection_switch_min || initial_connect_timeout=$timeout_20 - reconnect_delay_max=$((connection_switch_max + connection_switch_inc + \ + reconnect_delay_max=$((connection_switch_max + connection_switch_inc + initial_connect_timeout)) echo $((2 * reconnect_delay_max)) } @@ -8050,22 +9041,22 @@ PROC_CON="srpc_contexts" combination() { - local M=$1 - local N=$2 - local R=1 - - if [ $M -lt $N ]; then - R=0 - else - N=$((N + 1)) - while [ $N -lt $M ]; do - R=$((R * N)) - N=$((N + 1)) - done - fi + local M=$1 + local N=$2 + local R=1 - echo $R - return 0 + if [ $M -lt $N ]; then + R=0 + else + N=$((N + 1)) + while [ $N -lt $M ]; do + R=$((R * N)) + N=$((N + 1)) + done + fi + + echo $R + return 0 } calc_connection_cnt() { @@ -8106,24 +9097,24 @@ calc_connection_cnt() { set_rule() { - local tgt=$1 - local net=$2 - local dir=$3 - local flavor=$4 - local cmd="$tgt.srpc.flavor" - - if [ $net == "any" ]; then - net="default" - fi - cmd="$cmd.$net" + local tgt=$1 + local net=$2 + local dir=$3 + local flavor=$4 + local cmd="$tgt.srpc.flavor" - if [ $dir != "any" ]; then - cmd="$cmd.$dir" - fi + if [ $net == "any" ]; then + net="default" + fi + cmd="$cmd.$net" + + if [ $dir != "any" ]; then + cmd="$cmd.$dir" + fi - cmd="$cmd=$flavor" - log "Setting sptlrpc rule: $cmd" - do_facet mgs "$LCTL conf_param $cmd" + cmd="$cmd=$flavor" + log "Setting sptlrpc rule: $cmd" + do_facet mgs "$LCTL conf_param $cmd" } count_contexts() @@ -8135,186 +9126,302 @@ count_contexts() count_flvr() { - local output=$1 - local flavor=$2 - local count=0 - - rpc_flvr=`echo $flavor | awk -F - '{ print $1 }'` - bulkspec=`echo $flavor | awk -F - '{ print $2 }'` - - count=`echo "$output" | grep "rpc flavor" | grep $rpc_flvr | wc -l` - - if [ "x$bulkspec" != "x" ]; then - algs=`echo $bulkspec | awk -F : '{ print $2 }'` - - if [ "x$algs" != "x" ]; then - bulk_count=`echo "$output" | grep "bulk flavor" | grep $algs | wc -l` - else - bulk=`echo $bulkspec | awk -F : '{ print $1 }'` - if [ $bulk == "bulkn" ]; then - bulk_count=`echo "$output" | grep "bulk flavor" \ - | grep "null/null" | wc -l` - elif [ $bulk == "bulki" ]; then - bulk_count=`echo "$output" | grep "bulk flavor" \ - | grep "/null" | grep -v "null/" | wc -l` - else - bulk_count=`echo "$output" | grep "bulk flavor" \ - | grep -v "/null" | grep -v "null/" | wc -l` - fi - fi - - [ $bulk_count -lt $count ] && count=$bulk_count - fi + local output=$1 + local flavor=$2 + local count=0 + + rpc_flvr=`echo $flavor | awk -F - '{ print $1 }'` + bulkspec=`echo $flavor | awk -F - '{ print $2 }'` + + count=`echo "$output" | grep "rpc flavor" | grep $rpc_flvr | wc -l` + + if [ "x$bulkspec" != "x" ]; then + algs=`echo $bulkspec | awk -F : '{ print $2 }'` + + if [ "x$algs" != "x" ]; then + bulk_count=`echo "$output" | grep "bulk flavor" | + grep $algs | wc -l` + else + bulk=`echo $bulkspec | awk -F : '{ print $1 }'` + + if [ $bulk == "bulkn" ]; then + bulk_count=`echo "$output" | + grep "bulk flavor" | grep "null/null" | + wc -l` + elif [ $bulk == "bulki" ]; then + bulk_count=`echo "$output" | + grep "bulk flavor" | grep "/null" | + grep -v "null/" | wc -l` + else + bulk_count=`echo "$output" | + grep "bulk flavor" | grep -v "/null" | + grep -v "null/" | wc -l` + fi + fi + [ $bulk_count -lt $count ] && count=$bulk_count + fi + + echo $count +} + +flvr_cnt_cli2mdt() +{ + local flavor=$1 + local cnt + + local clients=${CLIENTS:-$HOSTNAME} + + for c in ${clients//,/ }; do + local output=$(do_node $c lctl get_param -n \ + mdc.*-*-mdc-*.$PROC_CLI 2>/dev/null) + local tmpcnt=$(count_flvr "$output" $flavor) - echo $count + if $GSS_SK && [ $flavor != "null" ]; then + # tmpcnt=min(contexts,flavors) to ensure SK context is + # on + output=$(do_node $c lctl get_param -n \ + mdc.*-MDT*-mdc-*.$PROC_CON 2>/dev/null) + local outcon=$(count_contexts "$output") + + if [ "$outcon" -lt "$tmpcnt" ]; then + tmpcnt=$outcon + fi + fi + cnt=$((cnt + tmpcnt)) + done + echo $cnt +} + +flvr_dump_cli2mdt() +{ + local clients=${CLIENTS:-$HOSTNAME} + + for c in ${clients//,/ }; do + do_node $c lctl get_param \ + mdc.*-*-mdc-*.$PROC_CLI 2>/dev/null + + if $GSS_SK; then + do_node $c lctl get_param \ + mdc.*-MDT*-mdc-*.$PROC_CON 2>/dev/null + fi + done +} + +flvr_cnt_cli2ost() +{ + local flavor=$1 + local cnt + + local clients=${CLIENTS:-$HOSTNAME} + + for c in ${clients//,/ }; do + # reconnect if idle + do_node $c lctl set_param osc.*.idle_connect=1 >/dev/null 2>&1 + local output=$(do_node $c lctl get_param -n \ + osc.*OST*-osc-[^M][^D][^T]*.$PROC_CLI 2>/dev/null) + local tmpcnt=$(count_flvr "$output" $flavor) + + if $GSS_SK && [ $flavor != "null" ]; then + # tmpcnt=min(contexts,flavors) to ensure SK context is on + output=$(do_node $c lctl get_param -n \ + osc.*OST*-osc-[^M][^D][^T]*.$PROC_CON 2>/dev/null) + local outcon=$(count_contexts "$output") + + if [ "$outcon" -lt "$tmpcnt" ]; then + tmpcnt=$outcon + fi + fi + cnt=$((cnt + tmpcnt)) + done + echo $cnt +} + +flvr_dump_cli2ost() +{ + local clients=${CLIENTS:-$HOSTNAME} + + for c in ${clients//,/ }; do + do_node $c lctl get_param \ + osc.*OST*-osc-[^M][^D][^T]*.$PROC_CLI 2>/dev/null + + if $GSS_SK; then + do_node $c lctl get_param \ + osc.*OST*-osc-[^M][^D][^T]*.$PROC_CON 2>/dev/null + fi + done +} + +flvr_cnt_mdt2mdt() +{ + local flavor=$1 + local cnt=0 + + if [ $MDSCOUNT -le 1 ]; then + echo 0 + return + fi + + for num in `seq $MDSCOUNT`; do + local output=$(do_facet mds$num lctl get_param -n \ + osp.*-MDT*osp-MDT*.$PROC_CLI 2>/dev/null) + local tmpcnt=$(count_flvr "$output" $flavor) + + if $GSS_SK && [ $flavor != "null" ]; then + # tmpcnt=min(contexts,flavors) to ensure SK context is on + output=$(do_facet mds$num lctl get_param -n \ + osp.*-MDT*osp-MDT*.$PROC_CON 2>/dev/null) + local outcon=$(count_contexts "$output") + + if [ "$outcon" -lt "$tmpcnt" ]; then + tmpcnt=$outcon + fi + fi + cnt=$((cnt + tmpcnt)) + done + echo $cnt; +} + +flvr_dump_mdt2mdt() +{ + for num in `seq $MDSCOUNT`; do + do_facet mds$num lctl get_param \ + osp.*-MDT*osp-MDT*.$PROC_CLI 2>/dev/null + + if $GSS_SK; then + do_facet mds$num lctl get_param \ + osp.*-MDT*osp-MDT*.$PROC_CON 2>/dev/null + fi + done } -flvr_cnt_cli2mdt() +flvr_cnt_mdt2ost() { - local flavor=$1 - local cnt - - local clients=${CLIENTS:-$HOSTNAME} + local flavor=$1 + local cnt=0 + local mdtosc - for c in ${clients//,/ }; do - local output=$(do_node $c lctl get_param -n \ - mdc.*-*-mdc-*.$PROC_CLI 2>/dev/null) - local tmpcnt=$(count_flvr "$output" $flavor) - if $GSS_SK && [ $flavor != "null" ]; then - # tmpcnt=min(contexts,flavors) to ensure SK context is on - output=$(do_node $c lctl get_param -n \ - mdc.*-MDT*-mdc-*.$PROC_CON 2>/dev/null) - local outcon=$(count_contexts "$output") - if [ "$outcon" -lt "$tmpcnt" ]; then - tmpcnt=$outcon + for num in `seq $MDSCOUNT`; do + mdtosc=$(get_mdtosc_proc_path mds$num) + mdtosc=${mdtosc/-MDT*/-MDT\*} + local output=$(do_facet mds$num lctl get_param -n \ + os[cp].$mdtosc.$PROC_CLI 2>/dev/null) + # Ensure SK context is on + local tmpcnt=$(count_flvr "$output" $flavor) + + if $GSS_SK && [ $flavor != "null" ]; then + output=$(do_facet mds$num lctl get_param -n \ + os[cp].$mdtosc.$PROC_CON 2>/dev/null) + local outcon=$(count_contexts "$output") + + if [ "$outcon" -lt "$tmpcnt" ]; then + tmpcnt=$outcon + fi fi - fi - cnt=$((cnt + tmpcnt)) - done - echo $cnt + cnt=$((cnt + tmpcnt)) + done + echo $cnt; } -flvr_cnt_cli2ost() +flvr_dump_mdt2ost() { - local flavor=$1 - local cnt - - local clients=${CLIENTS:-$HOSTNAME} + for num in `seq $MDSCOUNT`; do + mdtosc=$(get_mdtosc_proc_path mds$num) + mdtosc=${mdtosc/-MDT*/-MDT\*} + do_facet mds$num lctl get_param \ + os[cp].$mdtosc.$PROC_CLI 2>/dev/null - for c in ${clients//,/ }; do - # reconnect if idle - do_node $c lctl set_param osc.*.idle_connect=1 >/dev/null 2>&1 - local output=$(do_node $c lctl get_param -n \ - osc.*OST*-osc-[^M][^D][^T]*.$PROC_CLI 2>/dev/null) - local tmpcnt=$(count_flvr "$output" $flavor) - if $GSS_SK && [ $flavor != "null" ]; then - # tmpcnt=min(contexts,flavors) to ensure SK context is on - output=$(do_node $c lctl get_param -n \ - osc.*OST*-osc-[^M][^D][^T]*.$PROC_CON 2>/dev/null) - local outcon=$(count_contexts "$output") - if [ "$outcon" -lt "$tmpcnt" ]; then - tmpcnt=$outcon + if $GSS_SK; then + do_facet mds$num lctl get_param \ + os[cp].$mdtosc.$PROC_CON 2>/dev/null fi - fi - cnt=$((cnt + tmpcnt)) - done - echo $cnt + done } -flvr_cnt_mdt2mdt() +flvr_cnt_mgc2mgs() { - local flavor=$1 - local cnt=0 - - if [ $MDSCOUNT -le 1 ]; then - echo 0 - return - fi + local flavor=$1 - for num in `seq $MDSCOUNT`; do - local output=$(do_facet mds$num lctl get_param -n \ - osp.*-MDT*osp-MDT*.$PROC_CLI 2>/dev/null) - local tmpcnt=$(count_flvr "$output" $flavor) - if $GSS_SK && [ $flavor != "null" ]; then - # tmpcnt=min(contexts,flavors) to ensure SK context is on - output=$(do_facet mds$num lctl get_param -n \ - osp.*-MDT*osp-MDT*.$PROC_CON 2>/dev/null) - local outcon=$(count_contexts "$output") - if [ "$outcon" -lt "$tmpcnt" ]; then - tmpcnt=$outcon - fi - fi - cnt=$((cnt + tmpcnt)) - done - echo $cnt; + local output=$(do_facet client lctl get_param -n mgc.*.$PROC_CLI \ + 2>/dev/null) + count_flvr "$output" $flavor } -flvr_cnt_mdt2ost() +do_check_flavor() { - local flavor=$1 - local cnt=0 - local mdtosc - - for num in `seq $MDSCOUNT`; do - mdtosc=$(get_mdtosc_proc_path mds$num) - mdtosc=${mdtosc/-MDT*/-MDT\*} - local output=$(do_facet mds$num lctl get_param -n \ - os[cp].$mdtosc.$PROC_CLI 2>/dev/null) - local tmpcnt=$(count_flvr "$output" $flavor) - if $GSS_SK && [ $flavor != "null" ]; then - # tmpcnt=min(contexts,flavors) to ensure SK context is on - output=$(do_facet mds$num lctl get_param -n \ - os[cp].$mdtosc.$PROC_CON 2>/dev/null) - local outcon=$(count_contexts "$output") - if [ "$outcon" -lt "$tmpcnt" ]; then - tmpcnt=$outcon - fi + local dir=$1 # from to + local flavor=$2 # flavor expected + local res=0 + + if [ $dir == "cli2mdt" ]; then + res=`flvr_cnt_cli2mdt $flavor` + elif [ $dir == "cli2ost" ]; then + res=`flvr_cnt_cli2ost $flavor` + elif [ $dir == "mdt2mdt" ]; then + res=`flvr_cnt_mdt2mdt $flavor` + elif [ $dir == "mdt2ost" ]; then + res=`flvr_cnt_mdt2ost $flavor` + elif [ $dir == "all2ost" ]; then + res1=`flvr_cnt_mdt2ost $flavor` + res2=`flvr_cnt_cli2ost $flavor` + res=$((res1 + res2)) + elif [ $dir == "all2mdt" ]; then + res1=`flvr_cnt_mdt2mdt $flavor` + res2=`flvr_cnt_cli2mdt $flavor` + res=$((res1 + res2)) + elif [ $dir == "all2all" ]; then + res1=`flvr_cnt_mdt2ost $flavor` + res2=`flvr_cnt_cli2ost $flavor` + res3=`flvr_cnt_mdt2mdt $flavor` + res4=`flvr_cnt_cli2mdt $flavor` + res=$((res1 + res2 + res3 + res4)) fi - cnt=$((cnt + tmpcnt)) - done - echo $cnt; + + echo $res } -flvr_cnt_mgc2mgs() +do_dump_imp_state() { - local flavor=$1 + local clients=${CLIENTS:-$HOSTNAME} + local type=$1 - local output=$(do_facet client lctl get_param -n mgc.*.$PROC_CLI \ - 2>/dev/null) - count_flvr "$output" $flavor + for c in ${clients//,/ }; do + [ "$type" == "osc" ] && + do_node $c lctl get_param osc.*.idle_timeout + do_node $c lctl get_param $type.*.import | + grep -E "name:|state:" + done } -do_check_flavor() +do_dump_flavor() { - local dir=$1 # from to - local flavor=$2 # flavor expected - local res=0 - - if [ $dir == "cli2mdt" ]; then - res=`flvr_cnt_cli2mdt $flavor` - elif [ $dir == "cli2ost" ]; then - res=`flvr_cnt_cli2ost $flavor` - elif [ $dir == "mdt2mdt" ]; then - res=`flvr_cnt_mdt2mdt $flavor` - elif [ $dir == "mdt2ost" ]; then - res=`flvr_cnt_mdt2ost $flavor` - elif [ $dir == "all2ost" ]; then - res1=`flvr_cnt_mdt2ost $flavor` - res2=`flvr_cnt_cli2ost $flavor` - res=$((res1 + res2)) - elif [ $dir == "all2mdt" ]; then - res1=`flvr_cnt_mdt2mdt $flavor` - res2=`flvr_cnt_cli2mdt $flavor` - res=$((res1 + res2)) - elif [ $dir == "all2all" ]; then - res1=`flvr_cnt_mdt2ost $flavor` - res2=`flvr_cnt_cli2ost $flavor` - res3=`flvr_cnt_mdt2mdt $flavor` - res4=`flvr_cnt_cli2mdt $flavor` - res=$((res1 + res2 + res3 + res4)) - fi + local dir=$1 # from to - echo $res + if [ $dir == "cli2mdt" ]; then + do_dump_imp_state mdc + flvr_dump_cli2mdt + elif [ $dir == "cli2ost" ]; then + do_dump_imp_state osc + flvr_dump_cli2ost + elif [ $dir == "mdt2mdt" ]; then + flvr_dump_mdt2mdt + elif [ $dir == "mdt2ost" ]; then + flvr_dump_mdt2ost + elif [ $dir == "all2ost" ]; then + flvr_dump_mdt2ost + do_dump_imp_state osc + flvr_dump_cli2ost + elif [ $dir == "all2mdt" ]; then + flvr_dump_mdt2mdt + do_dump_imp_state mdc + flvr_dump_cli2mdt + elif [ $dir == "all2all" ]; then + flvr_dump_mdt2ost + do_dump_imp_state osc + flvr_dump_cli2ost + flvr_dump_mdt2mdt + do_dump_imp_state mdc + flvr_dump_cli2mdt + fi } wait_flavor() @@ -8334,8 +9441,8 @@ wait_flavor() done echo "Error checking $flavor of $dir: expect $expect, actual $res" -# echo "Dumping additional logs for SK debug.." do_nodes $(comma_list $(all_server_nodes)) "keyctl show" + do_dump_flavor $dir if $dump; then gather_logs $(comma_list $(nodes_list)) fi @@ -8419,22 +9526,26 @@ set_flavor_all() fi echo "GSS_SK now at flavor: $flavor" else - set_rule $FSNAME any any $flavor - wait_flavor all2all $flavor + set_rule $FSNAME any cli2mdt $flavor + set_rule $FSNAME any cli2ost $flavor + set_rule $FSNAME any mdt2ost null + set_rule $FSNAME any mdt2mdt null + wait_flavor cli2mdt $flavor + wait_flavor cli2ost $flavor fi } check_logdir() { - local dir=$1 - # Checking for shared logdir - if [ ! -d $dir ]; then - # Not found. Create local logdir - mkdir -p $dir - else - touch $dir/check_file.$(hostname -s) - fi - return 0 + local dir=$1 + # Checking for shared logdir + if [ ! -d $dir ]; then + # Not found. Create local logdir + mkdir -p $dir + else + touch $dir/check_file.$(hostname -s) + fi + return 0 } check_write_access() { @@ -8480,160 +9591,73 @@ init_logging() { # log actual client and server versions if needed for debugging log "Client: $(lustre_build_version client)" + lustre_os_release echo client log "MDS: $(lustre_build_version mds1)" + lustre_os_release echo mds1 log "OSS: $(lustre_build_version ost1)" + lustre_os_release echo ost1 } log_test() { - yml_log_test $1 >> $YAML_LOG + yml_log_test $1 >> $YAML_LOG } log_test_status() { - yml_log_test_status $@ >> $YAML_LOG + yml_log_test_status "$@" >> $YAML_LOG } log_sub_test_begin() { - yml_log_sub_test_begin "$@" >> $YAML_LOG + yml_log_sub_test_begin "$@" >> $YAML_LOG } log_sub_test_end() { - yml_log_sub_test_end "$@" >> $YAML_LOG + yml_log_sub_test_end "$@" >> $YAML_LOG } run_llverdev() { - local dev=$1 - local llverdev_opts=$2 - local devname=$(basename $1) - local size=$(grep "$devname"$ /proc/partitions | awk '{print $3}') - # loop devices aren't in /proc/partitions - [ "x$size" == "x" ] && local size=$(ls -l $dev | awk '{print $5}') + local dev=$1; shift + local llverdev_opts="$*" + local devname=$(basename $dev) + local size=$(awk "/$devname$/ {print \$3}" /proc/partitions) + # loop devices aren't in /proc/partitions + [[ -z "$size" ]] && size=$(stat -c %s $dev) - size=$(($size / 1024 / 1024)) # Gb + local size_gb=$((size / 1024 / 1024)) # Gb - local partial_arg="" - # Run in partial (fast) mode if the size - # of a partition > 1 GB - [ $size -gt 1 ] && partial_arg="-p" + local partial_arg="" + # Run in partial (fast) mode if the size of a partition > 1 GB + (( $size == 0 || $size_gb > 1 )) && partial_arg="-p" - llverdev --force $partial_arg $llverdev_opts $dev + llverdev --force $partial_arg $llverdev_opts $dev } run_llverfs() { - local dir=$1 - local llverfs_opts=$2 - local use_partial_arg=$3 - local partial_arg="" - local size=$(df -B G $dir |tail -n 1 |awk '{print $2}' |sed 's/G//') #GB - - # Run in partial (fast) mode if the size - # of a partition > 1 GB - [ "x$use_partial_arg" != "xno" ] && [ $size -gt 1 ] && partial_arg="-p" - - llverfs $partial_arg $llverfs_opts $dir -} - -#Remove objects from OST -remove_ost_objects() { - local facet=$1 - local ostdev=$2 - local group=$3 - shift 3 - local objids="$@" - local mntpt=$(facet_mntpt $facet) - local opts=$OST_MOUNT_OPTS - local i - local rc - - echo "removing objects from $ostdev on $facet: $objids" - if ! test -b $ostdev; then - opts=$(csa_add "$opts" -o loop) - fi - mount -t $(facet_fstype $facet) $opts $ostdev $mntpt || - return $? - rc=0 - for i in $objids; do - rm $mntpt/O/$group/d$((i % 32))/$i || { rc=$?; break; } - done - umount -f $mntpt || return $? - return $rc -} - -#Remove files from MDT -remove_mdt_files() { - local facet=$1 - local mdtdev=$2 - shift 2 - local files="$@" - local mntpt=$(facet_mntpt $facet) - local opts=$MDS_MOUNT_OPTS - - echo "removing files from $mdtdev on $facet: $files" - if [ $(facet_fstype $facet) == ldiskfs ] && - ! do_facet $facet test -b $mdtdev; then - opts=$(csa_add "$opts" -o loop) - fi - mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt || - return $? - rc=0 - for f in $files; do - rm $mntpt/ROOT/$f || { rc=$?; break; } - done - umount -f $mntpt || return $? - return $rc -} - -duplicate_mdt_files() { - local facet=$1 - local mdtdev=$2 - shift 2 - local files="$@" - local mntpt=$(facet_mntpt $facet) - local opts=$MDS_MOUNT_OPTS + local dir=$1 + local llverfs_opts=$2 + local use_partial_arg=$3 + local partial_arg="" + local size=$(df -B G $dir |tail -n 1 |awk '{print $2}' |sed 's/G//') #GB - echo "duplicating files on $mdtdev on $facet: $files" - mkdir -p $mntpt || return $? - if [ $(facet_fstype $facet) == ldiskfs ] && - ! do_facet $facet test -b $mdtdev; then - opts=$(csa_add "$opts" -o loop) - fi - mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt || - return $? + # Run in partial (fast) mode if the size of a partition > 1 GB + [ "x$use_partial_arg" != "xno" ] && [ $size -gt 1 ] && partial_arg="-p" - do_umount() { - trap 0 - popd > /dev/null - rm $tmp - umount -f $mntpt - } - trap do_umount EXIT - - tmp=$(mktemp $TMP/setfattr.XXXXXXXXXX) - pushd $mntpt/ROOT > /dev/null || return $? - rc=0 - for f in $files; do - touch $f.bad || return $? - getfattr -n trusted.lov $f | sed "s#$f#&.bad#" > $tmp - rc=${PIPESTATUS[0]} - [ $rc -eq 0 ] || return $rc - setfattr --restore $tmp || return $? - done - do_umount + llverfs $partial_arg $llverfs_opts $dir } run_sgpdd () { - local devs=${1//,/ } - shift - local params=$@ - local rslt=$TMP/sgpdd_survey + local devs=${1//,/ } + shift + local params=$@ + local rslt=$TMP/sgpdd_survey - # sgpdd-survey cleanups ${rslt}.* files + # sgpdd-survey cleanups ${rslt}.* files - local cmd="rslt=$rslt $params scsidevs=\"$devs\" $SGPDDSURVEY" - echo + $cmd - eval $cmd - cat ${rslt}.detail + local cmd="rslt=$rslt $params scsidevs=\"$devs\" $SGPDDSURVEY" + echo + $cmd + eval $cmd + cat ${rslt}.detail } # returns the canonical name for an ldiskfs device @@ -8655,14 +9679,15 @@ ldiskfs_canon() { } is_sanity_benchmark() { - local benchmarks="dbench bonnie iozone fsx" - local suite=$1 - for b in $benchmarks; do - if [ "$b" == "$suite" ]; then - return 0 - fi - done - return 1 + local benchmarks="dbench bonnie iozone fsx" + local suite=$1 + + for b in $benchmarks; do + if [ "$b" == "$suite" ]; then + return 0 + fi + done + return 1 } min_ost_size () { @@ -8704,27 +9729,17 @@ get_block_count() { local device=$2 local count - [ -z "$CLIENTONLY" ] && count=$(do_facet $facet "$DUMPE2FS -h $device 2>&1" | - awk '/^Block count:/ {print $3}') + [ -z "$CLIENTONLY" ] && + count=$(do_facet $facet "$DUMPE2FS -h $device 2>&1" | + awk '/^Block count:/ {print $3}') echo -n ${count:-0} } -# Get the block size of the filesystem. -get_block_size() { - local facet=$1 - local device=$2 - local size - - [ -z "$CLIENTONLY" ] && size=$(do_facet $facet "$DUMPE2FS -h $device 2>&1" | - awk '/^Block size:/ {print $3}') - echo -n ${size:-0} -} - # Check whether the "ea_inode" feature is enabled or not, to allow # ldiskfs xattrs over one block in size. Allow both the historical # Lustre feature name (large_xattr) and the upstream name (ea_inode). large_xattr_enabled() { - [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 1 + [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 0 local mds_dev=$(mdsdevname ${SINGLEMDS//mds/}) @@ -8740,17 +9755,17 @@ max_xattr_size() { # Dump the value of the named xattr from a file. get_xattr_value() { - local xattr_name=$1 - local file=$2 + local xattr_name=$1 + local file=$2 - echo "$(getfattr -n $xattr_name --absolute-names --only-values $file)" + echo "$(getfattr -n $xattr_name --absolute-names --only-values $file)" } # Generate a string with size of $size bytes. generate_string() { - local size=${1:-1024} # in bytes + local size=${1:-1024} # in bytes - echo "$(head -c $size < /dev/zero | tr '\0' y)" + echo "$(head -c $size < /dev/zero | tr '\0' y)" } reformat_external_journal() { @@ -8758,11 +9773,14 @@ reformat_external_journal() { local var var=${facet}_JRN + local varbs=${facet}_BLOCKSIZE if [ -n "${!var}" ]; then local rcmd="do_facet $facet" + local bs=${!varbs:-$BLCKSIZE} + bs="-b $bs" echo "reformat external journal on $facet:${!var}" - ${rcmd} mke2fs -O journal_dev ${!var} || return 1 + ${rcmd} mke2fs -O journal_dev $bs ${!var} || return 1 fi } @@ -8773,7 +9791,6 @@ mds_backup_restore() { local devname=$(mdsdevname $(facet_number $facet)) local mntpt=$(facet_mntpt brpt) local rcmd="do_facet $facet" - local metaea=${TMP}/backup_restore.ea local metadata=${TMP}/backup_restore.tgz local opts=${MDS_MOUNT_FS_OPTS} local svc=${facet}_svc @@ -8787,41 +9804,36 @@ mds_backup_restore() { # step 1: build mount point ${rcmd} mkdir -p $mntpt # step 2: cleanup old backup - ${rcmd} rm -f $metaea $metadata + ${rcmd} rm -f $metadata # step 3: mount dev - ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 1 + ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 3 if [ ! -z $igif ]; then # step 3.5: rm .lustre - ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 1 + ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 3 fi - # step 4: backup metaea - echo "backup EA" - ${rcmd} "cd $mntpt && getfattr -R -d -m '.*' -P . > $metaea && cd -" || - return 2 - # step 5: backup metadata + # step 4: backup metadata echo "backup data" - ${rcmd} tar zcf $metadata -C $mntpt/ . > /dev/null 2>&1 || return 3 - # step 6: umount - ${rcmd} $UMOUNT $mntpt || return 4 - # step 8: reformat dev + ${rcmd} tar zcf $metadata --xattrs --xattrs-include="trusted.*" \ + --sparse -C $mntpt/ . > /dev/null 2>&1 || return 4 + # step 5: umount + ${rcmd} $UMOUNT $mntpt || return 5 + # step 6: reformat dev echo "reformat new device" format_mdt $(facet_number $facet) - # step 9: mount dev + # step 7: mount dev ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 7 - # step 10: restore metadata + # step 8: restore metadata echo "restore data" - ${rcmd} tar zxfp $metadata -C $mntpt > /dev/null 2>&1 || return 8 - # step 11: restore metaea - echo "restore EA" - ${rcmd} "cd $mntpt && setfattr --restore=$metaea && cd - " || return 9 - # step 12: remove recovery logs + ${rcmd} tar zxfp $metadata --xattrs --xattrs-include="trusted.*" \ + --sparse -C $mntpt > /dev/null 2>&1 || return 8 + # step 9: remove recovery logs echo "remove recovery logs" ${rcmd} rm -fv $mntpt/OBJECTS/* $mntpt/CATALOGS - # step 13: umount dev + # step 10: umount dev ${rcmd} $UMOUNT $mntpt || return 10 - # step 14: cleanup tmp backup + # step 11: cleanup tmp backup ${rcmd} rm -f $metaea $metadata - # step 15: reset device label - it's not virgin on + # step 12: reset device label - it's not virgin on ${rcmd} e2label $devname ${!svc} } @@ -8879,10 +9891,16 @@ test_mkdir() { local dirstripe_count=${DIRSTRIPE_COUNT:-"2"} local dirstripe_index=${DIRSTRIPE_INDEX:-$((base % $MDSCOUNT))} local OPTIND=1 + local overstripe_count + local stripe_command="-c" - while getopts "c:H:i:p" opt; do + (( $MDS1_VERSION > $(version_code 2.15.0) )) && + hash_name+=("crush2") + + while getopts "c:C:H:i:p" opt; do case $opt in c) dirstripe_count=$OPTARG;; + C) overstripe_count=$OPTARG;; H) hash_type=$OPTARG;; i) dirstripe_index=$OPTARG;; p) p_option="-p";; @@ -8903,6 +9921,11 @@ test_mkdir() { fi fi + if [[ -n "$overstripe_count" ]]; then + stripe_command="-C" + dirstripe_count=$overstripe_count + fi + if [ $MDSCOUNT -le 1 ] || ! is_lustre ${parent}; then mkdir $path || error "mkdir '$path' failed" else @@ -8926,9 +9949,9 @@ test_mkdir() { dirstripe_count=1 fi - echo "striped dir -i$mdt_index -c$dirstripe_count -H $hash_type $path" - $LFS mkdir -i$mdt_index -c$dirstripe_count -H $hash_type $path || - error "mkdir -i $mdt_index -c$dirstripe_count -H $hash_type $path failed" + echo "striped dir -i$mdt_index $stripe_command$dirstripe_count -H $hash_type $path" + $LFS mkdir -i$mdt_index $stripe_command$dirstripe_count -H $hash_type $path || + error "mkdir -i $mdt_index $stripe_command$dirstripe_count -H $hash_type $path failed" fi } @@ -8955,7 +9978,7 @@ check_mount_and_prep() is_mounted $MOUNT || setupall rm -rf $DIR/[df][0-9]* || error "Fail to cleanup the env!" - mkdir $DIR/$tdir || error "Fail to mkdir $DIR/$tdir." + mkdir_on_mdt0 $DIR/$tdir || error "Fail to mkdir $DIR/$tdir." for idx in $(seq $MDSCOUNT); do local name="MDT$(printf '%04x' $((idx - 1)))" rm -rf $MOUNT/.lustre/lost+found/$name/* @@ -9016,6 +10039,7 @@ pool_add_targets() { if [ -z $last ]; then local list=$first + last=$first else local list=$(seq $first $step $last) fi @@ -9480,7 +10504,7 @@ parse_plain_param() echo "-c $val" elif [[ $line =~ ^"lmm_stripe_size:" ]]; then echo "-S $val" - elif [[ $line =~ ^"lmm_stripe_offset:" ]]; then + elif [[ $line =~ ^"lmm_stripe_offset:" && $SKIP_INDEX != yes ]]; then echo "-i $val" elif [[ $line =~ ^"lmm_pattern:" ]]; then echo "-L $val" @@ -9639,8 +10663,8 @@ changelog_register() { error "$mdt: changelog_mask=+hsm failed: $?" local cl_user - cl_user=$(do_facet $facet \ - $LCTL --device $mdt changelog_register -n) || + cl_user=$(do_facet $facet $LCTL --device $mdt \ + changelog_register -n "$@") || error "$mdt: register changelog user failed: $?" stack_trap "__changelog_deregister $facet $cl_user" EXIT @@ -9649,7 +10673,7 @@ changelog_register() { # cl_user is constrained enough to use whitespaces as separators CL_USERS[$facet]+="$cl_user " done - echo "Registered $MDSCOUNT changelog users: '${CL_USERS[@]% }'" + echo "Registered $MDSCOUNT changelog users: '${CL_USERS[*]% }'" } changelog_deregister() { @@ -9716,21 +10740,26 @@ __changelog_clear() $LFS changelog_clear $mdt $cl_user $rec } -# usage: changelog_clear [+]INDEX +# usage: changelog_clear [+]INDEX [facet]... # # If INDEX is prefixed with '+', increment every changelog user's record index # by INDEX. Otherwise, clear the changelog up to INDEX for every changelog # users. changelog_clear() { local rc + local idx=$1 + shift + local cl_facets="$@" # bash assoc arrays do not guarantee to list keys in created order # so reorder to get same order than in changelog_register() - local cl_facets=$(echo "${!CL_USERS[@]}" | tr " " "\n" | sort | - tr "\n" " ") + [[ -n "$cl_facets" ]] || + cl_facets=$(echo "${!CL_USERS[@]}" | tr " " "\n" | sort | + tr "\n" " ") + local cl_user for facet in $cl_facets; do for cl_user in ${CL_USERS[$facet]}; do - __changelog_clear $facet $cl_user $1 || rc=${rc:-$?} + __changelog_clear $facet $cl_user $idx || rc=${rc:-$?} done done @@ -9990,16 +11019,24 @@ verify_yaml_layout() { is_project_quota_supported() { $ENABLE_PROJECT_QUOTAS || return 1 + [[ -z "$SAVE_PROJECT_SUPPORTED" ]] || return $SAVE_PROJECT_SUPPORTED + local save_project_supported=1 [[ "$(facet_fstype $SINGLEMDS)" == "ldiskfs" && $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.9.55) ]] && - do_facet mds1 lfs --help |& grep -q project && return 0 + do_facet mds1 lfs --list-commands |& grep -q project && + save_project_supported=0 [[ "$(facet_fstype $SINGLEMDS)" == "zfs" && $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.10.53) ]] && - do_facet mds1 $ZPOOL get all | grep -q project_quota && return 0 + do_facet mds1 $ZPOOL get all | grep -q project_quota && + save_project_supported=0 - return 1 + # cache state of project quotas once instead of re-checking each time + export SAVE_PROJECT_SUPPORTED=$save_project_supported + echo "using SAVE_PROJECT_SUPPORTED=$SAVE_PROJECT_SUPPORTED" + + return $save_project_supported } # ZFS project quota enable/disable: @@ -10151,7 +11188,8 @@ pkill_copytools() { local hosts="$1" local signal="$2" - do_nodes "$hosts" "pkill --pidfile=$HSMTOOL_PID_FILE --signal=$signal hsmtool" + do_nodes "$hosts" \ + "pkill --pidfile=$HSMTOOL_PID_FILE --signal=$signal hsmtool" } copytool_continue() { @@ -10201,23 +11239,26 @@ copytool_logfile() __lhsmtool_rebind() { - do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --rebind "$@" "$mountpoint" + do_facet $facet $HSMTOOL \ + "${hsmtool_options[@]}" --rebind "$@" "$mountpoint" } __lhsmtool_import() { mkdir -p "$(dirname "$2")" || error "cannot create directory '$(dirname "$2")'" - do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --import "$@" "$mountpoint" + do_facet $facet $HSMTOOL \ + "${hsmtool_options[@]}" --import "$@" "$mountpoint" } __lhsmtool_setup() { local host="$(facet_host "$facet")" local cmd="$HSMTOOL ${hsmtool_options[@]} --daemon --pid-file=$HSMTOOL_PID_FILE" + [ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth" [ -n "$archive_id" ] && cmd+=" --archive $archive_id" -# [ ${#misc_options[@]} -gt 0 ] && + # [ ${#misc_options[@]} -gt 0 ] && # cmd+=" $(IFS=" " echo "$@")" cmd+=" $@ \"$mountpoint\"" @@ -10252,11 +11293,10 @@ copytool() # Use default values local facet=$SINGLEAGT local mountpoint="${MOUNT2:-$MOUNT}" - local hsm_root="${hsm_root:-$(hsm_root "$facet")}" # Parse arguments local fail_on_error=true - local -a hsmtool_options=("--hsm-root=$hsm_root") + local -a hsmtool_options=() local -a action_options=() if [[ -n "$HSMTOOL_ARCHIVE_FORMAT" ]]; then @@ -10283,7 +11323,7 @@ copytool() ;; -h|--hsm-root) shift - hsm_root="$1" + local hsm_root="$1" ;; -b|--bwlimit) shift @@ -10300,6 +11340,9 @@ copytool() shift done + local hsm_root="${hsm_root:-$(hsm_root "$facet")}" + hsmtool_options+=("--hsm-root=$hsm_root") + stack_trap "do_facet $facet rm -rf '$hsm_root'" EXIT do_facet $facet mkdir -p "$hsm_root" || error "mkdir '$hsm_root' failed" @@ -10380,7 +11423,7 @@ mdts_set_param() { local mdtno local rc=0 if [[ "$value" != "" ]]; then - value="=$value" + value="='$value'" fi for mdtno in $(seq 1 $MDSCOUNT); do local idx=$(($mdtno - 1)) @@ -10514,8 +11557,14 @@ rmultiop_stop() { } sleep_maxage() { - local delay=$(do_facet $SINGLEMDS lctl get_param -n lo[vd].*.qos_maxage | - awk '{ print $1 * 2; exit; }') + local delay=$(do_facet mds1 lctl get_param -n lod.*.qos_maxage | + awk '{ print $1 + 5; exit; }') + sleep $delay +} + +sleep_maxage_lmv() { + local delay=$(lctl get_param -n lmv.*.qos_maxage | + awk '{ print $1 + 5; exit; }') sleep $delay } @@ -10556,6 +11605,13 @@ statx_supported() { return $? } +# lfs rm_entry is disabled on native client +is_rmentry_supported() { + $LFS rm_entry $DIR/dir/not/exists > /dev/null + # is return code ENOENT? + (( $? == 2 )) +} + # # wrappers for createmany and unlinkmany # to set debug=0 if number of creates is high enough @@ -10563,51 +11619,91 @@ statx_supported() { # function createmany() { local count=${!#} + local rc - (( count > 100 )) && { - local saved_debug=$($LCTL get_param -n debug) - local list=$(comma_list $(all_nodes)) - - do_nodes $list $LCTL set_param debug=0 - } + if (( count > 100 )); then + debugsave + do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0 + fi $LUSTRE/tests/createmany $* - local rc=$? - (( count > 100 )) && - do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\"" + rc=$? + debugrestore > /dev/null + return $rc } function unlinkmany() { local count=${!#} + local rc - (( count > 100 )) && { - local saved_debug=$($LCTL get_param -n debug) - local list=$(comma_list $(all_nodes)) - - do_nodes $list $LCTL set_param debug=0 - } + if (( count > 100 )); then + debugsave + do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0 + fi $LUSTRE/tests/unlinkmany $* - local rc=$? - (( count > 100 )) && - do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\"" + rc=$? + debugrestore > /dev/null + return $rc } +# Check if fallocate on facet is working. Returns fallocate mode if enabled. +# Takes optional facet name as argument, to allow separate MDS/OSS checks. +function check_fallocate_supported() +{ + local facet=${1:-ost1} + local supported="FALLOCATE_SUPPORTED_$facet" + local fstype="${facet}_FSTYPE" + + if [[ -n "${!supported}" ]]; then + echo "${!supported}" + return 0 + fi + if [[ -z "${!fstype}" ]]; then + eval export $fstype=$(facet_fstype $facet) + fi + if [[ "${!fstype}" != "ldiskfs" ]]; then + echo "fallocate on ${!fstype} doesn't consume space" 1>&2 + return 1 + fi + + local fa_mode="osd-ldiskfs.$(facet_svc $facet).fallocate_zero_blocks" + local mode=$(do_facet $facet $LCTL get_param -n $fa_mode 2>/dev/null | + head -n 1) + + if [[ -z "$mode" ]]; then + echo "fallocate not supported on $facet" 1>&2 + return 1 + fi + eval export $supported="$mode" + + echo ${!supported} + return 0 +} + +# Check if fallocate supported on OSTs, enable if unset, skip if unavailable. +# Takes optional facet name as argument. +function check_fallocate_or_skip() +{ + local facet=$1 + + check_fallocate_supported $1 || skip "fallocate not supported" +} + # Check if fallocate supported on OSTs, enable if unset, default mode=0 # Optionally pass the OST fallocate mode (0=unwritten extents, 1=zero extents) function check_set_fallocate() { local new_mode="$1" - local osts=$(comma_list $(osts_nodes)) local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks" - local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null| - head -n 1) + local old_mode="$(check_fallocate_supported)" [[ -n "$old_mode" ]] || { echo "fallocate not supported"; return 1; } [[ -z "$new_mode" && "$old_mode" != "-1" ]] && { echo "keep default fallocate mode: $old_mode"; return 0; } [[ "$new_mode" && "$old_mode" == "$new_mode" ]] && { echo "keep current fallocate mode: $old_mode"; return 0; } + local osts=$(comma_list $(osts_nodes)) stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode" do_nodes $osts $LCTL set_param $fa_mode=${new_mode:-0} || @@ -10617,7 +11713,273 @@ function check_set_fallocate() # Check if fallocate supported on OSTs, enable if unset, skip if unavailable function check_set_fallocate_or_skip() { - [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" - check_set_fallocate || skip "need at least 2.13.57 for fallocate" + check_set_fallocate || skip "need >= 2.13.57 and ldiskfs for fallocate" +} + +function disable_opencache() +{ + local state=$($LCTL get_param -n "llite.*.opencache_threshold_count" | + head -1) + + test -z "${saved_OPENCACHE_value}" && + export saved_OPENCACHE_value="$state" + + [[ "$state" = "off" ]] && return + + $LCTL set_param -n "llite.*.opencache_threshold_count"=off +} + +function set_opencache() +{ + local newvalue="$1" + local state=$($LCTL get_param -n "llite.*.opencache_threshold_count") + + [[ -n "$newvalue" ]] || return + + [[ -n "${saved_OPENCACHE_value}" ]] || + export saved_OPENCACHE_value="$state" + + $LCTL set_param -n "llite.*.opencache_threshold_count"=$newvalue +} + + + +function restore_opencache() +{ + [[ -z "${saved_OPENCACHE_value}" ]] || + $LCTL set_param -n "llite.*.opencache_threshold_count"=${saved_OPENCACHE_value} +} + +# LU-13417: XXX lots of tests assume the directory to be created under MDT0, +# created on MDT0, use this function to create directory on specific MDT +# explicitly, and set default LMV to create subdirs on the same MDT too. +mkdir_on_mdt() { + local mdt + local OPTIND=1 + + while getopts "i:" opt $*; do + case $opt in + i) mdt=$OPTARG;; + esac + done + + shift $((OPTIND - 1)) + + $LFS mkdir -i $mdt -c 1 $* +} + +mkdir_on_mdt0() { + mkdir_on_mdt -i0 $* +} + +# Wait for nodemap synchronization +wait_nm_sync() { + local nodemap_name=$1 + local key=$2 + local value=$3 + local opt=$4 + local proc_param + local is_active=$(do_facet mgs $LCTL get_param -n nodemap.active) + local max_retries=20 + local is_sync + local out1="" + local out2 + local mgs_ip=$(host_nids_address $mgs_HOST $NETTYPE | cut -d' ' -f1) + local i + + if [ "$nodemap_name" == "active" ]; then + proc_param="active" + elif [ -z "$key" ]; then + proc_param=${nodemap_name} + else + proc_param="${nodemap_name}.${key}" + fi + if [ "$opt" == "inactive" ]; then + # check nm sync even if nodemap is not activated + is_active=1 + opt="" + fi + (( is_active == 0 )) && [ "$proc_param" != "active" ] && return + + if [ -z "$value" ]; then + out1=$(do_facet mgs $LCTL get_param $opt \ + nodemap.${proc_param} 2>/dev/null) + echo "On MGS ${mgs_ip}, ${proc_param} = $out1" + else + out1=$value; + fi + + # if servers run on the same node, it is impossible to tell if they get + # synced with the mgs, so just wait an arbitrary 10 seconds + if [ $(facet_active_host mgs) == $(facet_active_host mds) ] && + [ $(facet_active_host mgs) == $(facet_active_host ost1) ]; then + echo "waiting 10 secs for sync" + sleep 10 + return + fi + + # wait up to 10 seconds for other servers to sync with mgs + for i in $(seq 1 10); do + for node in $(all_server_nodes); do + local node_ip=$(host_nids_address $node $NETTYPE | + cut -d' ' -f1) + + is_sync=true + if [ -z "$value" ]; then + [ $node_ip == $mgs_ip ] && continue + fi + + out2=$(do_node $node $LCTL get_param $opt \ + nodemap.$proc_param 2>/dev/null) + echo "On $node ${node_ip}, ${proc_param} = $out2" + [ "$out1" != "$out2" ] && is_sync=false && break + done + $is_sync && break + sleep 1 + done + if ! $is_sync; then + echo MGS + echo $out1 + echo OTHER - IP: $node_ip + echo $out2 + error "mgs and $nodemap_name ${key} mismatch, $i attempts" + fi + echo "waited $((i - 1)) seconds for sync" +} + +consume_precreations() { + local dir=$1 + local mfacet=$2 + local OSTIDX=$3 + local extra=${4:-2} + local OST=$(ostname_from_index $OSTIDX $dir) + + mkdir_on_mdt -i $(facet_index $mfacet) $dir/${OST} + $LFS setstripe -i $OSTIDX -c 1 ${dir}/${OST} + + # on the mdt's osc + local mdtosc_proc=$(get_mdtosc_proc_path $mfacet $OST) + local last_id=$(do_facet $mfacet $LCTL get_param -n \ + osp.$mdtosc_proc.prealloc_last_id) + local next_id=$(do_facet $mfacet $LCTL get_param -n \ + osp.$mdtosc_proc.prealloc_next_id) + echo "Creating to objid $last_id on ost $OST..." + createmany -o $dir/${OST}/f $next_id $((last_id - next_id + extra)) +} + +__exhaust_precreations() { + local OSTIDX=$1 + local FAILLOC=$2 + local FAILIDX=${3:-$OSTIDX} + local ofacet=ost$((OSTIDX + 1)) + + mkdir_on_mdt0 $DIR/$tdir + local mdtidx=$($LFS getstripe -m $DIR/$tdir) + local mfacet=mds$((mdtidx + 1)) + echo OSTIDX=$OSTIDX MDTIDX=$mdtidx + + local mdtosc_proc=$(get_mdtosc_proc_path $mfacet) + do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc* + +#define OBD_FAIL_OST_ENOSPC 0x215 + do_facet $ofacet $LCTL set_param fail_val=$FAILIDX fail_loc=0x215 + + consume_precreations $DIR/$tdir $mfacet $OSTIDX + + do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc* + do_facet $ofacet $LCTL set_param fail_loc=$FAILLOC +} + +exhaust_precreations() { + __exhaust_precreations $1 $2 $3 + sleep_maxage +} + +exhaust_all_precreations() { + local i + for (( i=0; i < OSTCOUNT; i++ )) ; do + __exhaust_precreations $i $1 -1 + done + sleep_maxage +} + +force_new_seq_ost() { + local dir=$1 + local mfacet=$2 + local OSTIDX=$3 + local OST=$(ostname_from_index $OSTIDX) + local mdtosc_proc=$(get_mdtosc_proc_path $mfacet $OST) + + do_facet $mfacet $LCTL set_param \ + osp.$mdtosc_proc.prealloc_force_new_seq=1 + # consume preallocated objects, to wake up precreate thread + consume_precreations $dir $mfacet $OSTIDX + do_facet $mfacet $LCTL set_param \ + osp.$mdtosc_proc.prealloc_force_new_seq=0 +} + +force_new_seq() { + local mfacet=$1 + local MDTIDX=$(facet_index $mfacet) + local MDT=$(mdtname_from_index $MDTIDX $DIR) + local i + + mkdir_on_mdt -i $MDTIDX $DIR/${MDT} + for (( i=0; i < OSTCOUNT; i++ )) ; do + force_new_seq_ost $DIR/${MDT} $mfacet $i & + done + wait + rm -rf $DIR/${MDT} +} + +force_new_seq_all() { + local i + + for (( i=0; i < MDSCOUNT; i++ )) ; do + force_new_seq mds$((i + 1)) & + done + wait + sleep_maxage +} + +ost_set_temp_seq_width_all() { + local osts=$(comma_list $(osts_nodes)) + local width=$(do_facet ost1 $LCTL get_param -n seq.*OST0000-super.width) + + do_nodes $osts $LCTL set_param seq.*OST*-super.width=$1 + stack_trap "do_nodes $osts $LCTL set_param seq.*OST*-super.width=$width" +} + +verify_yaml_available() { + python3 -c "import yaml; yaml.safe_load('''a: b''')" +} + +verify_yaml() { + python3 -c "import sys, yaml; obj = yaml.safe_load(sys.stdin)" +} + +verify_compare_yaml() { + python3 -c "import sys, yaml; f=open(\"$1\", \"r\"); obj1 = yaml.safe_load(f); f=open(\"$2\", \"r\"); obj2 = yaml.safe_load(f); sys.exit(obj1 != obj2)" } +zfs_or_rotational() { + local ost_idx=0 + local ost_name=$(ostname_from_index $ost_idx $MOUNT) + local param="get_param -n osd-*.${ost_name}.nonrotational" + local nonrotat=$(do_facet ost1 $LCTL $param) + + if [[ -z "$nonrotat" ]]; then + # At this point there is no point moving ahead. + # Will stop here and dump all the info + set -x + local ost_name=$(ostname_from_index $ost_idx) + set +x + error "$LCTL $input_str" + fi + + if [[ "$ost1_FSTYPE" == "zfs" ]] || (( "$nonrotat" == 0 )); then + return 0 + else + return 1 + fi +}