X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=17363b16b307e2b579740045860e49edb36ede05;hp=15668019d754cca34e377ccebd8638d40cd0c07e;hb=814bb394843434883a94fe6432cd8c656035a3e1;hpb=dcdf060342e7d69b64171840cf9475bf65d036ea diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 1566801..17363b1 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -295,8 +295,6 @@ init_test_env() { [ ! -f "$LCTL" ] && export LCTL=$(which lctl) export LFS=${LFS:-"$LUSTRE/utils/lfs"} [ ! -f "$LFS" ] && export LFS=$(which lfs) - SETSTRIPE=${SETSTRIPE:-"$LFS setstripe"} - GETSTRIPE=${GETSTRIPE:-"$LFS getstripe"} export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"} @@ -431,6 +429,11 @@ init_test_env() { . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} get_lustre_env + # use localrecov to enable recovery for local clients, LU-12722 + [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] || + export MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o localrecov"} + [[ $OST1_VERSION -lt $(version_code 2.13.52) ]] || + export OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o localrecov"} } check_cpt_number() { @@ -453,7 +456,7 @@ version_code() { # split arguments like "1.8.6-wc3" into "1", "8", "6", "wc3" eval set -- $(tr "[:punct:]" " " <<< $*) - echo -n "$((($1 << 16) | ($2 << 8) | $3))" + echo -n $(((${1:-0} << 16) | (${2:-0} << 8) | ${3:-0})) } export LINUX_VERSION=$(uname -r | sed -e "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/") @@ -490,14 +493,31 @@ export LINUX_VERSION_CODE=$(version_code ${LINUX_VERSION//\./ }) # output: prints version string to stdout in (up to 4) dotted-decimal values lustre_build_version() { local facet=${1:-client} - local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null || - $LCTL lustre_build_version 2>/dev/null || - $LCTL --version 2>/dev/null | cut -d' ' -f2") + local facet_version=${facet}_VERSION + + # if the global variable is already set, then use that + [ -n "${!facet_version}" ] && echo ${!facet_version} && return + + # this is the currently-running version of the kernel modules + local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null") + # we mostly test 2.10+ systems, only try others if the above fails + if [ -z "$ver" ]; then + ver=$(do_facet $facet "$LCTL lustre_build_version 2>/dev/null") + fi + if [ -z "$ver" ]; then + ver=$(do_facet $facet "$LCTL --version 2>/dev/null" | + cut -d' ' -f2) + fi local lver=$(egrep -i "lustre: |version: " <<<"$ver" | head -n 1) [ -n "$lver" ] && ver="$lver" - sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' -e 's/_/./g' <<<$ver | - cut -d. -f1-4 + lver=$(sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' <<<$ver | + tr _ . | cut -d. -f1-4) + + # save in global variable for the future + export $facet_version=$lver + + echo $lver } # Report the Lustre numeric build version code for the supplied facet. @@ -974,6 +994,22 @@ add_sk_mntflag() { echo -n $mt_opts } +from_build_tree() { + local from_tree + + case $LUSTRE in + /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ + /usr/lib64/lustre ) + from_tree=false + ;; + *) + from_tree=true + ;; + esac + + [ $from_tree = true ] +} + init_gss() { if $SHARED_KEY; then GSS=true @@ -984,16 +1020,6 @@ init_gss() { return fi - case $LUSTRE in - /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ - /usr/lib64/lustre ) - from_build_tree=false - ;; - *) - from_build_tree=true - ;; - esac - if ! module_loaded ptlrpc_gss; then load_module ptlrpc/gss/ptlrpc_gss module_loaded ptlrpc_gss || @@ -1012,7 +1038,7 @@ init_gss() { SK_NO_KEY=false local lgssc_conf_file="/etc/request-key.d/lgssc.conf" - if $from_build_tree; then + if from_build_tree; then mkdir -p $SK_OM_PATH if grep -q request-key /proc/mounts > /dev/null; then echo "SSK: Request key already mounted." @@ -1031,7 +1057,7 @@ init_gss() { cat $lgssc_conf_file if ! local_mode; then - if $from_build_tree; then + if from_build_tree; then do_nodes $(comma_list $(all_nodes)) "mkdir -p \ $SK_OM_PATH" do_nodes $(comma_list $(all_nodes)) "mount \ @@ -1111,11 +1137,19 @@ init_gss() { OST_MOUNT_OPTS=$(add_sk_mntflag $OST_MOUNT_OPTS) MOUNT_OPTS=$(add_sk_mntflag $MOUNT_OPTS) SEC=$SK_FLAVOR + if [ -z "$LGSS_KEYRING_DEBUG" ]; then + LGSS_KEYRING_DEBUG=4 + fi fi - if [ -n "$LGSS_KEYRING_DEBUG" ]; then + if [ -n "$LGSS_KEYRING_DEBUG" ] && \ + ( local_mode || from_build_tree ); then lctl set_param -n \ - sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG + sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG + elif [ -n "$LGSS_KEYRING_DEBUG" ]; then + do_nodes $(comma_list $(all_nodes)) "modprobe ptlrpc_gss && \ + lctl set_param -n \ + sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG" fi } @@ -1128,16 +1162,6 @@ cleanup_gss() { cleanup_sk() { if $GSS_SK; then - case $LUSTRE in - /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ - /usr/lib64/lustre ) - from_build_tree=false - ;; - *) - from_build_tree=true - ;; - esac - if $SK_S2S; then do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNM" do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNMCLI" @@ -1150,7 +1174,7 @@ cleanup_sk() { $SK_PATH/$FSNAME*.key $SK_PATH/nodemap/$FSNAME*.key" do_nodes $(comma_list $(all_nodes)) "keyctl show | \ awk '/lustre/ { print \\\$1 }' | xargs -IX keyctl unlink X" - if $from_build_tree; then + if from_build_tree; then # Remove the mount and clean up the files we added to # SK_PATH do_nodes $(comma_list $(all_nodes)) "while grep -q \ @@ -1552,34 +1576,42 @@ set_debug_size () { } set_default_debug () { - local debug=${1:-"$PTLDEBUG"} - local subsys=${2:-"$SUBSYSTEM"} - local debug_size=${3:-$DEBUG_SIZE} + local debug=${1:-"$PTLDEBUG"} + local subsys=${2:-"$SUBSYSTEM"} + local debug_size=${3:-$DEBUG_SIZE} - [ -n "$debug" ] && lctl set_param debug="$debug" >/dev/null - [ -n "$subsys" ] && lctl set_param subsystem_debug="${subsys# }" >/dev/null + [ -n "$debug" ] && lctl set_param debug="$debug" >/dev/null + [ -n "$subsys" ] && lctl set_param subsystem_debug="${subsys# }" >/dev/null - [ -n "$debug_size" ] && set_debug_size $debug_size > /dev/null + [ -n "$debug_size" ] && set_debug_size $debug_size > /dev/null } set_default_debug_nodes () { local nodes="$1" + local debug="${2:-"$PTLDEBUG"}" + local subsys="${3:-"$SUBSYSTEM"}" + local debug_size="${4:-$DEBUG_SIZE}" if [[ ,$nodes, = *,$HOSTNAME,* ]]; then nodes=$(exclude_items_from_list "$nodes" "$HOSTNAME") set_default_debug fi - do_rpc_nodes "$nodes" set_default_debug \ - \\\"$PTLDEBUG\\\" \\\"$SUBSYSTEM\\\" $DEBUG_SIZE || true + [[ -z "$nodes" ]] || + do_rpc_nodes "$nodes" set_default_debug \ + \\\"$debug\\\" \\\"$subsys\\\" $debug_size || true } set_default_debug_facet () { - local facet=$1 - local node=$(facet_active_host $facet) - [ -z "$node" ] && echo "No host defined for facet $facet" && exit 1 + local facet=$1 + local debug="${2:-"$PTLDEBUG"}" + local subsys="${3:-"$SUBSYSTEM"}" + local debug_size="${4:-$DEBUG_SIZE}" + local node=$(facet_active_host $facet) - set_default_debug_nodes $node + [ -n "$node" ] || error "No host defined for facet $facet" + + set_default_debug_nodes $node "$debug" "$subsys" $debug_size } set_hostid () { @@ -2325,6 +2357,21 @@ zconf_umount() { fi } +# Mount the file system on the MDS +mount_mds_client() { + local mds_HOST=${SINGLEMDS}_HOST + echo $mds_HOST + zconf_mount $mds1_HOST $MOUNT2 $MOUNT_OPTS || + error "unable to mount $MOUNT2 on MDS" +} + +# Unmount the file system on the MDS +umount_mds_client() { + local mds_HOST=${SINGLEMDS}_HOST + zconf_umount $mds1_HOST $MOUNT2 + do_facet $SINGLEMDS "rmdir $MOUNT2" +} + # nodes is comma list sanity_mount_check_nodes () { local nodes=$1 @@ -2729,16 +2776,15 @@ start_client_load() { } start_client_loads () { - local -a clients=(${1//,/ }) - local numloads=${#CLIENT_LOADS[@]} - local testnum + local -a clients=(${1//,/ }) + local numloads=${#CLIENT_LOADS[@]} - for ((nodenum=0; nodenum < ${#clients[@]}; nodenum++ )); do - testnum=$((nodenum % numloads)) - start_client_load ${clients[nodenum]} ${CLIENT_LOADS[testnum]} - done - # bug 22169: wait the background threads to start - sleep 2 + for ((nodenum=0; nodenum < ${#clients[@]}; nodenum++ )); do + local load=$((nodenum % numloads)) + start_client_load ${clients[nodenum]} ${CLIENT_LOADS[load]} + done + # bug 22169: wait the background threads to start + sleep 2 } # only for remote client @@ -3192,13 +3238,13 @@ wait_delete_completed_mds() { } wait_for_host() { - local hostlist=$1 + local hostlist=$1 - # we can use "for" here because we are waiting the slowest - for host in ${hostlist//,/ }; do - check_network "$host" 900 - done - while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done + # we can use "for" here because we are waiting the slowest + for host in ${hostlist//,/ }; do + check_network "$host" 900 + done + while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done } wait_for_facet() { @@ -5187,6 +5233,10 @@ init_param_vars () { # $LFS quotaoff -ug $MOUNT > /dev/null 2>&1 fi fi + + (( MDS1_VERSION <= $(version_code 2.13.52) )) || + do_nodes $(comma_list $(mdts_nodes)) \ + "$LCTL set_param lod.*.mdt_hash=crush" return 0 } @@ -5267,11 +5317,11 @@ check_timeout () { } is_mounted () { - local mntpt=$1 - [ -z $mntpt ] && return 1 - local mounted=$(mounted_lustre_filesystems) + local mntpt=$1 + [ -z $mntpt ] && return 1 + local mounted=$(mounted_lustre_filesystems) - echo $mounted' ' | grep -w -q $mntpt' ' + echo $mounted' ' | grep -w -q $mntpt' ' } is_empty_dir() { @@ -5370,6 +5420,9 @@ check_and_setup_lustre() { fi fi + if [ -n "$fs_STRIPEPARAMS" ]; then + setstripe_getstripe $MOUNT $fs_STRIPEPARAMS + fi if $GSS_SK; then set_flavor_all null elif $GSS; then @@ -5653,13 +5706,10 @@ check_network() { [ "$host" = "$HOSTNAME" ] && return 0 - echo "$(date +'%H:%M:%S (%s)') waiting for $host network $max secs ..." - if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then - echo "Network not available!" + if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep; then + echo "$(date +'%H:%M:%S (%s)') waited for $host network ${max}s" exit 1 fi - - echo "$(date +'%H:%M:%S (%s)') network interface is UP" } no_dsh() { @@ -6001,19 +6051,19 @@ debug_size_restore() { } start_full_debug_logging() { - debugsave - debug_size_save + debugsave + debug_size_save - local FULLDEBUG=-1 - local DEBUG_SIZE=150 + local fulldebug=-1 + local debug_size=150 + local nodes=$(comma_list $(nodes_list)) - do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug_mb=$DEBUG_SIZE" - do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug=$FULLDEBUG;" + do_nodes $nodes "$LCTL set_param debug=$fulldebug debug_mb=$debug_size" } stop_full_debug_logging() { - debug_size_restore - debugrestore + debug_size_restore + debugrestore } # prints bash call stack @@ -6170,6 +6220,7 @@ skip_noexit() { [[ -n "$TESTSUITELOG" ]] && echo "$TESTSUITE: SKIP: $TESTNAME $@" >> $TESTSUITELOG || true + unset TESTNAME } skip() { @@ -6190,22 +6241,22 @@ build_test_filter() { fi done - [ "$EXCEPT$ALWAYS_EXCEPT" ] && \ - log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`" - [ "$EXCEPT_SLOW" ] && \ - log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`" - for E in $EXCEPT; do - eval EXCEPT_${E}=true - done - for E in $ALWAYS_EXCEPT; do - eval EXCEPT_ALWAYS_${E}=true - done - for E in $EXCEPT_SLOW; do - eval EXCEPT_SLOW_${E}=true - done - for G in $GRANT_CHECK_LIST; do - eval GCHECK_ONLY_${G}=true - done + [ "$EXCEPT$ALWAYS_EXCEPT" ] && + log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`" + [ "$EXCEPT_SLOW" ] && + log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`" + for E in $EXCEPT; do + eval EXCEPT_${E}=true + done + for E in $ALWAYS_EXCEPT; do + eval EXCEPT_ALWAYS_${E}=true + done + for E in $EXCEPT_SLOW; do + eval EXCEPT_SLOW_${E}=true + done + for G in $GRANT_CHECK_LIST; do + eval GCHECK_ONLY_${G}=true + done } basetest() { @@ -6220,59 +6271,63 @@ basetest() { export LAST_SKIPPED= export ALWAYS_SKIPPED= # -# Main entry into test-framework. This is called with the name and -# description of a test. The name is used to find the function to run +# Main entry into test-framework. This is called with the number and +# description of a test. The number is used to find the function to run # the test using "test_$name". # # This supports a variety of methods of specifying specific test to -# run or not run. These need to be documented... +# run or not run: +# - ONLY= env variable with space-separated list of test numbers to run +# - EXCEPT= env variable with space-separated list of test numbers to exclude # run_test() { assert_DIR - export base=$(basetest $1) - TESTNAME=test_$1 + local testnum=$1 + local testmsg=$2 + export base=$(basetest $testnum) + export TESTNAME=test_$testnum LAST_SKIPPED= ALWAYS_SKIPPED= # Check the EXCEPT, ALWAYS_EXCEPT and SLOW lists to see if we # need to skip the current test. If so, set the ALWAYS_SKIPPED flag. - local testname=EXCEPT_$1 - local testname_base=EXCEPT_$base - if [ ${!testname}x != x ]; then + local isexcept=EXCEPT_$testnum + local isexcept_base=EXCEPT_$base + if [ ${!isexcept}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping excluded test $1" - elif [ ${!testname_base}x != x ]; then + skip_message="skipping excluded test $testnum" + elif [ ${!isexcept_base}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping excluded test $1 (base $base)" + skip_message="skipping excluded test $testnum (base $base)" fi - testname=EXCEPT_ALWAYS_$1 - testname_base=EXCEPT_ALWAYS_$base - if [ ${!testname}x != x ]; then + isexcept=EXCEPT_ALWAYS_$testnum + isexcept_base=EXCEPT_ALWAYS_$base + if [ ${!isexcept}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping ALWAYS excluded test $1" - elif [ ${!testname_base}x != x ]; then + skip_message="skipping ALWAYS excluded test $testnum" + elif [ ${!isexcept_base}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping ALWAYS excluded test $1 (base $base)" + skip_message="skipping ALWAYS excluded test $testnum (base $base)" fi - testname=EXCEPT_SLOW_$1 - testname_base=EXCEPT_SLOW_$base - if [ ${!testname}x != x ]; then + isexcept=EXCEPT_SLOW_$testnum + isexcept_base=EXCEPT_SLOW_$base + if [ ${!isexcept}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping SLOW test $1" - elif [ ${!testname_base}x != x ]; then + skip_message="skipping SLOW test $testnum" + elif [ ${!isexcept_base}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping SLOW test $1 (base $base)" + skip_message="skipping SLOW test $testnum (base $base)" fi # If there are tests on the ONLY list, check if the current test # is on that list and, if so, check if the test is to be skipped # and if we are supposed to honor the skip lists. if [ -n "$ONLY" ]; then - testname=ONLY_$1 - testname_base=ONLY_$base - if [[ ${!testname}x != x || ${!testname_base}x != x ]]; then + local isonly=ONLY_$testnum + local isonly_base=ONLY_$base + if [[ ${!isonly}x != x || ${!isonly_base}x != x ]]; then if [[ -n "$ALWAYS_SKIPPED" && -n "$HONOR_EXCEPT" ]]; then LAST_SKIPPED="y" @@ -6282,7 +6337,7 @@ run_test() { [ -n "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED= ALWAYS_SKIPPED= - run_one_logged $1 "$2" + run_one_logged $testnum "$testmsg" return $? fi @@ -6297,10 +6352,9 @@ run_test() { skip_noexit "$skip_message" return 0 else - run_one_logged $1 "$2" + run_one_logged $testnum "$testmsg" return $? fi - } log() { @@ -6400,10 +6454,7 @@ group descriptors corrupted" # run_one() { local testnum=$1 - local message=$2 - export tfile=f${testnum}.${TESTSUITE} - export tdir=d${testnum}.${TESTSUITE} - export TESTNAME=test_$testnum + local testmsg="$2" local SAVE_UMASK=`umask` umask 0022 @@ -6411,7 +6462,7 @@ run_one() { $SETUP fi - banner "test $testnum: $message" + banner "test $testnum: $testmsg" test_${testnum} || error "test_$testnum failed with $?" cd $SAVE_PWD reset_fail_loc @@ -6419,12 +6470,9 @@ run_one() { check_node_health check_dmesg_for_errors || error "Error in dmesg detected" if [ "$PARALLEL" != "yes" ]; then - ps auxww | grep -v grep | grep -q multiop && + ps auxww | grep -v grep | grep -q "multiop " && error "multiop still running" fi - unset TESTNAME - unset tdir - unset tfile umask $SAVE_UMASK $CLEANUP return 0 @@ -6437,49 +6485,74 @@ run_one() { # - test result is saved to data file # run_one_logged() { - local BEFORE=$(date +%s) - local TEST_ERROR - local name=${TESTSUITE}.test_${1}.test_log.$(hostname -s).log + local before=$SECONDS + local testnum=$1 + local testmsg=$2 + export tfile=f${testnum}.${TESTSUITE} + export tdir=d${testnum}.${TESTSUITE} + local name=$TESTSUITE.$TESTNAME.test_log.$(hostname -s).log local test_log=$LOGDIR/$name - local zfs_log_name=${TESTSUITE}.test_${1}.zfs_log + local zfs_log_name=$TESTSUITE.$TESTNAME.zfs_log local zfs_debug_log=$LOGDIR/$zfs_log_name - rm -rf $LOGDIR/err - rm -rf $LOGDIR/ignore - rm -rf $LOGDIR/skip local SAVE_UMASK=$(umask) + local rc=0 umask 0022 + rm -f $LOGDIR/err $LOGDIR/ignore $LOGDIR/skip echo - log_sub_test_begin test_${1} - (run_one $1 "$2") 2>&1 | tee -i $test_log - local RC=${PIPESTATUS[0]} - - [ $RC -ne 0 ] && [ ! -f $LOGDIR/err ] && - echo "test_$1 returned $RC" | tee $LOGDIR/err - - duration=$(($(date +%s) - $BEFORE)) - pass "$1" "(${duration}s)" + # if ${ONLY_$testnum} set, repeat $ONLY_REPEAT times, otherwise once + local isonly=ONLY_$testnum + local repeat=${!isonly:+$ONLY_REPEAT} + + for testiter in $(seq ${repeat:-1}); do + local before_sub=$SECONDS + log_sub_test_begin $TESTNAME + + # remove temp files between repetitions to avoid test failures + [ -n "$append" -a -n "$DIR" -a -n "$tdir" -a -n "$tfile" ] && + rm -rf $DIR/$tdir* $DIR/$tfile* + # loop around subshell so stack_trap EXIT triggers each time + (run_one $testnum "$testmsg") 2>&1 | tee -i $append $test_log + rc=${PIPESTATUS[0]} + local append=-a + local duration_sub=$((SECONDS - before_sub)) + local test_error + + [[ $rc != 0 && ! -f $LOGDIR/err ]] && + echo "$TESTNAME returned $rc" | tee $LOGDIR/err + + if [[ -f $LOGDIR/err ]]; then + test_error=$(cat $LOGDIR/err) + TEST_STATUS="FAIL" + elif [[ -f $LOGDIR/ignore ]]; then + test_error=$(cat $LOGDIR/ignore) + elif [[ -f $LOGDIR/skip ]]; then + test_error=$(cat $LOGDIR/skip) + TEST_STATUS="SKIP" + else + TEST_STATUS="PASS" + fi - if [[ -f $LOGDIR/err ]]; then - TEST_ERROR=$(cat $LOGDIR/err) - elif [[ -f $LOGDIR/ignore ]]; then - TEST_ERROR=$(cat $LOGDIR/ignore) - elif [[ -f $LOGDIR/skip ]]; then - TEST_ERROR=$(cat $LOGDIR/skip) - fi - log_sub_test_end $TEST_STATUS $duration "$RC" "$TEST_ERROR" + pass "$testnum" "(${duration_sub}s)" + log_sub_test_end $TEST_STATUS $duration_sub "$rc" "$test_error" + [[ $rc != 0 ]] && break + done - if [[ "$TEST_STATUS" != "SKIP" ]] && [[ -f $TF_SKIP ]]; then + if [[ "$TEST_STATUS" != "SKIP" && -f $TF_SKIP ]]; then rm -f $TF_SKIP fi if [ -f $LOGDIR/err ]; then log_zfs_info "$zfs_debug_log" - $FAIL_ON_ERROR && exit $RC + $FAIL_ON_ERROR && exit $rc fi umask $SAVE_UMASK + unset TESTNAME + unset tdir + unset tfile + return 0 } @@ -6496,17 +6569,39 @@ canonical_path() { (cd $(dirname $1); echo $PWD/$(basename $1)) } +grant_from_clients() { + local nodes="$1" + + # get client grant + do_nodes $nodes "$LCTL get_param -n osc.${FSNAME}-*.cur_*grant_bytes" | + calc_sum +} + +grant_from_servers() { + local nodes="$1" + + # get server grant + # which is tot_granted less grant_precreate + do_nodes $nodes "$LCTL get_param obdfilter.${FSNAME}-OST*.tot_granted" \ + " obdfilter.${FSNAME}-OST*.tot_pending" \ + " obdfilter.${FSNAME}-OST*.grant_precreate" | + tr '=' ' ' | awk '/tot_granted/{ total += $2 }; + /tot_pending/{ total -= $2 }; + /grant_precreate/{ total -= $2 }; + END { printf("%0.0f", total) }' +} check_grant() { export base=$(basetest $1) [ "$CHECK_GRANT" == "no" ] && return 0 - testnamebase=GCHECK_ONLY_${base} - testname=GCHECK_ONLY_$1 - [ ${!testnamebase}x == x -a ${!testname}x == x ] && return 0 + local isonly_base=GCHECK_ONLY_${base} + local isonly=GCHECK_ONLY_$1 + [ ${!isonly_base}x == x -a ${!isonly}x == x ] && return 0 echo -n "checking grant......" + local osts=$(comma_list $(osts_nodes)) local clients=$CLIENTS [ -z "$clients" ] && clients=$(hostname) @@ -6515,29 +6610,28 @@ check_grant() { clients_up # initiate all idling connections # get client grant - client_grant=$(do_nodes $clients \ - "$LCTL get_param -n osc.${FSNAME}-*.cur_*grant_bytes" | - awk '{ total += $1 } END { printf("%0.0f", total) }') + cli_grant=$(grant_from_clients $clients) # get server grant # which is tot_granted less grant_precreate - server_grant=$(do_nodes $(comma_list $(osts_nodes)) \ - "$LCTL get_param "\ - "obdfilter.${FSNAME}-OST*.{tot_granted,tot_pending,grant_precreate}" | - sed 's/=/ /'| awk '/tot_granted/{ total += $2 }; - /tot_pending/{ total -= $2 }; - /grant_precreate/{ total -= $2 }; - END { printf("%0.0f", total) }') + srv_grant=$(grant_from_servers $osts) + count=0 # check whether client grant == server grant - if [[ $client_grant -ne $server_grant ]]; then + while [[ $cli_grant != $srv_grant && count++ -lt 30 ]]; do + echo "wait for client:$cli_grant == server:$srv_grant" + sleep 1 + cli_grant=$(grant_from_clients $clients) + srv_grant=$(grant_from_servers $osts) + done + if [[ $cli_grant -ne $srv_grant ]]; then do_nodes $(comma_list $(osts_nodes)) \ "$LCTL get_param obdfilter.${FSNAME}-OST*.tot*" \ - "obdfilter.${FSNAME}-OST*.grant_*" + "obdfilter.${FSNAME}-OST*.grant_*" do_nodes $clients "$LCTL get_param osc.${FSNAME}-*.cur_*_bytes" - error "failed: client:${client_grant} server: ${server_grant}." + error "failed grant check: client:$cli_grant server:$srv_grant" else - echo "pass: client:${client_grant} server: ${server_grant}" + echo "pass grant check: client:$cli_grant server:$srv_grant" fi } @@ -6564,6 +6658,16 @@ ostname_from_index() { echo ${uuid/_UUID/} } +mdtname_from_index() { + local uuid=$(mdtuuid_from_index $1) + echo ${uuid/_UUID/} +} + +mdssize_from_index () { + local mdt=$(mdtname_from_index $2) + $LFS df $1 | grep $mdt | awk '{ print $2 }' +} + index_from_ostuuid() { $LFS osts $2 | sed -ne "/${1}/s/\(.*\): .* .*$/\1/p" @@ -7129,13 +7233,9 @@ check_node_health() { for node in ${nodes//,/ }; do check_network "$node" 5 if [ $? -eq 0 ]; then - do_node $node "rc=0; - val=\\\$($LCTL get_param -n catastrophe 2>&1); - if [[ \\\$? -eq 0 && \\\$val -ne 0 ]]; then - echo \\\$(hostname -s): \\\$val; - rc=\\\$val; - fi; - exit \\\$rc" || error "$node:LBUG/LASSERT detected" + do_node $node "$LCTL get_param catastrophe 2>&1" | + grep -q "catastrophe=1" && + error "$node:LBUG/LASSERT detected" || true fi done } @@ -7686,39 +7786,62 @@ destroy_test_pools () { } gather_logs () { - local list=$1 + local list=$1 - local ts=$(date +%s) - local docp=true + local ts=$(date +%s) + local docp=true - if [[ ! -f "$YAML_LOG" ]]; then - # init_logging is not performed before gather_logs, - # so the $LOGDIR needs to be checked here - check_shared_dir $LOGDIR && touch $LOGDIR/shared - fi + if [[ ! -f "$YAML_LOG" ]]; then + # init_logging is not performed before gather_logs, + # so the $LOGDIR needs to be checked here + check_shared_dir $LOGDIR && touch $LOGDIR/shared + fi - [ -f $LOGDIR/shared ] && docp=false + [ -f $LOGDIR/shared ] && docp=false - # dump lustre logs, dmesg + # dump lustre logs, dmesg, and journal if GSS_SK=true - prefix="$TESTLOG_PREFIX.$TESTNAME" - suffix="$ts.log" - echo "Dumping lctl log to ${prefix}.*.${suffix}" + prefix="$TESTLOG_PREFIX.$TESTNAME" + suffix="$ts.log" + echo "Dumping lctl log to ${prefix}.*.${suffix}" - if [ -n "$CLIENTONLY" -o "$PDSH" == "no_dsh" ]; then - echo "Dumping logs only on local client." - $LCTL dk > ${prefix}.debug_log.$(hostname -s).${suffix} - dmesg > ${prefix}.dmesg.$(hostname -s).${suffix} - return - fi + if [ -n "$CLIENTONLY" -o "$PDSH" == "no_dsh" ]; then + echo "Dumping logs only on local client." + $LCTL dk > ${prefix}.debug_log.$(hostname -s).${suffix} + dmesg > ${prefix}.dmesg.$(hostname -s).${suffix} + [ "$SHARED_KEY" = true ] && find $SK_PATH -name '*.key' -exec \ + lgss_sk -r {} \; &> \ + ${prefix}.ssk_keys.$(hostname -s).${suffix} + [ "$SHARED_KEY" = true ] && lctl get_param 'nodemap.*.*' > \ + ${prefix}.nodemaps.$(hostname -s).${suffix} + [ "$GSS_SK" = true ] && keyctl show > \ + ${prefix}.keyring.$(hostname -s).${suffix} + [ "$GSS_SK" = true ] && journalctl -a > \ + ${prefix}.journal.$(hostname -s).${suffix} + return + fi - do_nodesv $list \ - "$LCTL dk > ${prefix}.debug_log.\\\$(hostname -s).${suffix}; - dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}" + do_nodesv $list \ + "$LCTL dk > ${prefix}.debug_log.\\\$(hostname -s).${suffix}; + dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}" + if [ "$SHARED_KEY" = true ]; then + do_nodesv $list "find $SK_PATH -name '*.key' -exec \ + lgss_sk -r {} \; &> \ + ${prefix}.ssk_keys.\\\$(hostname -s).${suffix}" + do_facet mds1 "lctl get_param 'nodemap.*.*' > \ + ${prefix}.nodemaps.\\\$(hostname -s).${suffix}" + fi + if [ "$GSS_SK" = true ]; then + do_nodesv $list "keyctl show > \ + ${prefix}.keyring.\\\$(hostname -s).${suffix}" + do_nodesv $list "journalctl -a > \ + ${prefix}.journal.\\\$(hostname -s).${suffix}" + fi - if [ ! -f $LOGDIR/shared ]; then - do_nodes $list rsync -az "${prefix}.*.${suffix}" $HOSTNAME:$LOGDIR - fi + if [ ! -f $LOGDIR/shared ]; then + do_nodes $list rsync -az "${prefix}.*.${suffix}" \ + $HOSTNAME:$LOGDIR + fi } do_ls () { @@ -8231,13 +8354,10 @@ init_logging() { umask $save_umask - # If modules are not yet loaded then older "lctl lustre_build_version" - # will fail. Use lctl build version instead. - log "Client: $($LCTL lustre_build_version)" - log "MDS: $(do_facet $SINGLEMDS $LCTL lustre_build_version 2>/dev/null|| - do_facet $SINGLEMDS $LCTL --version)" - log "OSS: $(do_facet ost1 $LCTL lustre_build_version 2> /dev/null || - do_facet ost1 $LCTL --version)" + # log actual client and server versions if needed for debugging + log "Client: $(lustre_build_version client)" + log "MDS: $(lustre_build_version mds1)" + log "OSS: $(lustre_build_version ost1)" } log_test() { @@ -8531,7 +8651,7 @@ mds_backup_restore() { local rcmd="do_facet $facet" local metaea=${TMP}/backup_restore.ea local metadata=${TMP}/backup_restore.tgz - local opts=${MDS_MOUNT_OPTS} + local opts=${MDS_MOUNT_FS_OPTS} local svc=${facet}_svc if ! ${rcmd} test -b ${devname}; then @@ -8588,7 +8708,7 @@ mds_remove_ois() { local devname=$(mdsdevname $(facet_number $facet)) local mntpt=$(facet_mntpt brpt) local rcmd="do_facet $facet" - local opts=${MDS_MOUNT_OPTS} + local opts=${MDS_MOUNT_FS_OPTS} if ! ${rcmd} test -b ${devname}; then opts=$(csa_add "$opts" -o loop) @@ -8630,16 +8750,19 @@ generate_logname() { test_mkdir() { local path local p_option + local hash_type + local hash_name=("all_char" "fnv_1a_64" "crush") local dirstripe_count=${DIRSTRIPE_COUNT:-"2"} local dirstripe_index=${DIRSTRIPE_INDEX:-$((base % $MDSCOUNT))} local OPTIND=1 - while getopts "c:i:p" opt; do + while getopts "c:H:i:p" opt; do case $opt in c) dirstripe_count=$OPTARG;; + H) hash_type=$OPTARG;; i) dirstripe_index=$OPTARG;; p) p_option="-p";; - \?) error "only support -i -c -p";; + \?) error "only support -c -H -i -p";; esac done @@ -8668,6 +8791,10 @@ test_mkdir() { mdt_index=$dirstripe_index fi + # randomly choose hash type + [ -z "$hash_type" ] && + hash_type=${hash_name[$((RANDOM % ${#hash_name[@]}))]} + if (($MDS1_VERSION >= $(version_code 2.8.0))); then if [ $dirstripe_count -eq -1 ]; then dirstripe_count=$((RANDOM % MDSCOUNT + 1)) @@ -8676,9 +8803,9 @@ test_mkdir() { dirstripe_count=1 fi - echo "striped dir -i$mdt_index -c$dirstripe_count $path" - $LFS mkdir -i$mdt_index -c$dirstripe_count $path || - error "mkdir -i $mdt_index -c$dirstripe_count $path failed" + echo "striped dir -i$mdt_index -c$dirstripe_count -H $hash_type $path" + $LFS mkdir -i$mdt_index -c$dirstripe_count -H $hash_type $path || + error "mkdir -i $mdt_index -c$dirstripe_count -H $hash_type $path failed" fi } @@ -8732,7 +8859,7 @@ check_file_in_pool() local file=$1 local pool=$2 local tlist="$3" - local res=$($GETSTRIPE $file | grep 0x | cut -f2) + local res=$($LFS getstripe $file | grep 0x | cut -f2) for i in $res do for t in $tlist ; do @@ -8804,7 +8931,7 @@ pool_set_dir() { local tdir=$2 echo "Setting pool on directory $tdir" - $SETSTRIPE -c 2 -p $pool $tdir && return 0 + $LFS setstripe -c 2 -p $pool $tdir && return 0 error_noexit "Cannot set pool $pool to $tdir" return 1 @@ -8815,7 +8942,7 @@ pool_check_dir() { local tdir=$2 echo "Checking pool on directory $tdir" - local res=$($GETSTRIPE --pool $tdir | sed "s/\s*$//") + local res=$($LFS getstripe --pool $tdir | sed "s/\s*$//") [ "$res" = "$pool" ] && return 0 error_noexit "Pool on '$tdir' is '$res', not '$pool'" @@ -8870,7 +8997,7 @@ pool_create_files() { for i in $(seq -w 1 $count) do local file=$tdir/spoo-$i - $SETSTRIPE -p $pool $file + $LFS setstripe -p $pool $file check_file_in_pool $file $pool "$tlist" || \ failed=$((failed + 1)) done @@ -8904,11 +9031,11 @@ pool_file_rel_path() { mkdir -p $tdir || { error_noexit "unable to create $tdir"; return 1 ; } local file="/..$tdir/$tfile-1" - $SETSTRIPE -p $pool $file || + $LFS setstripe -p $pool $file || { error_noexit "unable to create $file" ; return 2 ; } cd $tdir - $SETSTRIPE -p $pool $tfile-2 || { + $LFS setstripe -p $pool $tfile-2 || { error_noexit "unable to create $tfile-2 in $tdir" return 3 } @@ -8967,7 +9094,7 @@ pool_remove_all_targets() { return 2 } # setstripe on an empty pool should fail - $SETSTRIPE -p $pool $file 2>/dev/null && { + $LFS setstripe -p $pool $file 2>/dev/null && { error_noexit "expected failure when creating file" \ "with empty pool" return 3 @@ -8990,7 +9117,7 @@ pool_remove() { return 1 } # setstripe on an empty pool should fail - $SETSTRIPE -p $pool $file 2>/dev/null && { + $LFS setstripe -p $pool $file 2>/dev/null && { error_noexit "expected failure when creating file" \ "with missing pool" return 2 @@ -9039,7 +9166,7 @@ check_obdidx() { [[ -z "$file" || -z "$expected" ]] && error "check_obdidx: invalid argument!" - obdidx=$(comma_list $($GETSTRIPE $file | grep -A $OSTCOUNT obdidx | + obdidx=$(comma_list $($LFS getstripe $file | grep -A $OSTCOUNT obdidx | grep -v obdidx | awk '{print $1}' | xargs)) [[ $obdidx = $expected ]] || @@ -9057,8 +9184,8 @@ check_start_ost_idx() { [[ -z "$file" || -z "$expected" ]] && error "check_start_ost_idx: invalid argument!" - start_ost_idx=$($GETSTRIPE $file | grep -A 1 obdidx | grep -v obdidx | - awk '{print $1}') + start_ost_idx=$($LFS getstripe $file | grep -A 1 obdidx | + grep -v obdidx | awk '{print $1}') [[ $start_ost_idx = $expected ]] || error "OST index of the first stripe on $file is" \ @@ -9471,12 +9598,24 @@ changelog_clear() { } changelog_dump() { + local rc + for M in $(seq $MDSCOUNT); do local facet=mds$M local mdt="$(facet_svc $facet)" - - $LFS changelog $mdt | sed -e 's/^/'$mdt'./' + local output + local ret + + output=$($LFS changelog $mdt) + ret=$? + if [ $ret -ne 0 ]; then + rc=${rc:-$ret} + elif [ -n "$output" ]; then + echo "$output" | sed -e 's/^/'$mdt'./' + fi done + + return ${rc:-0} } changelog_extract_field() { @@ -9711,19 +9850,14 @@ verify_yaml_layout() { is_project_quota_supported() { $ENABLE_PROJECT_QUOTAS || return 1 - [ "$(facet_fstype $SINGLEMDS)" == "ldiskfs" ] && - [ $(lustre_version_code $SINGLEMDS) -gt \ - $(version_code 2.9.55) ] && - lfs --help | grep project >&/dev/null && - egrep -q "7." /etc/redhat-release && return 0 - if [ "$(facet_fstype $SINGLEMDS)" == "zfs" ]; then - [ $(lustre_version_code $SINGLEMDS) -le \ - $(version_code 2.10.53) ] && return 1 + [[ "$(facet_fstype $SINGLEMDS)" == "ldiskfs" && + $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.9.55) ]] && + do_facet mds1 lfs --help |& grep -q project && return 0 - do_facet mds1 $ZPOOL get all | - grep -q project_quota && return 0 - fi + [[ "$(facet_fstype $SINGLEMDS)" == "zfs" && + $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.10.53) ]] && + do_facet mds1 $ZPOOL get all | grep -q project_quota && return 0 return 1 }