X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=9354b38913bb6aba63bcf82cff13c07d3d34d2d0;hp=17363b16b307e2b579740045860e49edb36ede05;hb=e2cb43c409b9;hpb=814bb394843434883a94fe6432cd8c656035a3e1 diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 17363b1..9354b38 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -277,6 +277,8 @@ init_test_env() { [ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey) export MCREATE=${MCREATE:-mcreate} export MULTIOP=${MULTIOP:-multiop} + export MMAP_CAT=${MMAP_CAT:-mmap_cat} + export STATX=${STATX:-statx} # Ubuntu, at least, has a truncate command in /usr/bin # so fully path our truncate command. export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate} @@ -700,6 +702,7 @@ load_modules_local() { # if there is more than 4 CPU cores, libcfs should create multiple CPU # partitions. So we just force libcfs to create 2 partitions for # system with 2 or 4 cores + local saved_opts="$MODOPTS_LIBCFS" if [ $ncpus -le 4 ] && [ $ncpus -gt 1 ]; then # force to enable multiple CPU partitions echo "Force libcfs to create 2 CPU partitions" @@ -711,7 +714,7 @@ load_modules_local() { load_module ../libcfs/libcfs/libcfs # Prevent local MODOPTS_LIBCFS being passed as part of environment # variable to remote nodes - unset MODOPTS_LIBCFS + MODOPTS_LIBCFS=$saved_opts set_default_debug load_module ../lnet/lnet/lnet @@ -735,8 +738,8 @@ load_modules_local() { load_module fid/fid load_module lmv/lmv load_module osc/osc - load_module mdc/mdc load_module lov/lov + load_module mdc/mdc load_module mgc/mgc load_module obdecho/obdecho if ! client_only; then @@ -2216,10 +2219,11 @@ mdt_free_inodes() { ost_dev_status() { local ost_idx=$1 local mnt_pnt=${2:-$MOUNT} + local opts=$3 local ost_uuid ost_uuid=$(ostuuid_from_index $ost_idx $mnt_pnt) - lfs_df $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }' + lfs_df $opts $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }' } setup_quota(){ @@ -2979,54 +2983,103 @@ cleanup_check() { return 0 } -wait_update () { +## +# wait for a command to return the expected result +# +# This will run @check on @node repeatedly until the output matches @expect +# based on the supplied condition, or until @max_wait seconds have elapsed, +# whichever comes first. @cond may be one of the normal bash operators, +# "-gt", "-ge", "-eq", "-le", "-lt", "==", "!=", or "=~", and must be quoted +# in the caller to avoid unintentional evaluation by the shell in the caller. +# +# If @max_wait is not specified, the condition will be checked for up to 90s. +# +# If --verbose is passed as the first argument, the result is printed on each +# value change, otherwise it is only printed after every 10s interval. +# +# Using wait_update_cond() or related helper function is preferable to adding +# a "long enough" wait for some state to change in the background, since +# "long enough" may be too short due to tunables, system config, or running in +# a VM, and must by necessity wait too long for most cases or risk failure. +# +# usage: wait_update_cond [--verbose] node check cond expect [max_wait] +wait_update_cond() { local verbose=false - if [[ "$1" == "--verbose" ]]; then - shift - verbose=true - fi + [[ "$1" == "--verbose" ]] && verbose=true && shift local node=$1 - local TEST=$2 - local FINAL=$3 - local MAX=${4:-90} - local RESULT - local PREV_RESULT - local WAIT=0 + local check="$2" + local cond="$3" + local expect="$4" + local max_wait=${5:-90} + local result + local prev_result + local waited=0 + local begin=$SECONDS local sleep=1 local print=10 - PREV_RESULT=$(do_node $node "$TEST") - while [ true ]; do - RESULT=$(do_node $node "$TEST") - if [[ "$RESULT" == "$FINAL" ]]; then - [[ -z "$RESULT" || $WAIT -le $sleep ]] || - echo "Updated after ${WAIT}s: wanted '$FINAL'"\ - "got '$RESULT'" + while (( $waited <= $max_wait )); do + result=$(do_node $node "$check") + + eval [[ "'$result'" $cond "'$expect'" ]] + if [[ $? == 0 ]]; then + [[ -z "$result" || $waited -le $sleep ]] || + echo "Updated after ${waited}s: want '$expect' got '$result'" return 0 fi - if [[ $verbose && "$RESULT" != "$PREV_RESULT" ]]; then - echo "Changed after ${WAIT}s: from '$PREV_RESULT'"\ - "to '$RESULT'" - PREV_RESULT=$RESULT + if $verbose && [[ "$result" != "$prev_result" ]]; then + [[ -n "$prev_result" ]] && + echo "Changed after ${waited}s: from '$prev_result' to '$result'" + prev_result="$result" fi - [[ $WAIT -ge $MAX ]] && break - [[ $((WAIT % print)) -eq 0 ]] && - echo "Waiting $((MAX - WAIT)) secs for update" - WAIT=$((WAIT + sleep)) + (( $waited % $print == 0 )) && + echo "Waiting $((max_wait - waited))s for '$expect'" sleep $sleep + waited=$((SECONDS - begin)) done - echo "Update not seen after ${MAX}s: wanted '$FINAL' got '$RESULT'" + echo "Update not seen after ${max_wait}s: want '$expect' got '$result'" return 3 } +# usage: wait_update [--verbose] node check expect [max_wait] +wait_update() { + local verbose= + [ "$1" = "--verbose" ] && verbose="$1" && shift + + local node="$1" + local check="$2" + local expect="$3" + local max_wait=$4 + + wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait +} + +# usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait] +wait_update_facet_cond() { + local verbose= + [ "$1" = "--verbose" ] && verbose="$1" && shift + + local node=$(facet_active_host $1) + local check="$2" + local cond="$3" + local expect="$4" + local max_wait=$5 + + wait_update_cond $verbose $node "$check" "$cond" "$expect" $max_wait +} + +# usage: wait_update_facet [--verbose] facet check expect [max_wait] wait_update_facet() { local verbose= [ "$1" = "--verbose" ] && verbose="$1" && shift - local facet=$1 - shift - wait_update $verbose $(facet_active_host $facet) "$@" + local node=$(facet_active_host $1) + local check="$2" + local expect="$3" + local max_wait=$4 + + wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait } sync_all_data() { @@ -3703,7 +3756,7 @@ host_nids_address() { local nodes=$1 local net=${2:-"."} - do_nodes $nodes "$LCTL list_nids | grep $net | cut -f 1 -d @" + do_nodes $nodes "$LCTL list_nids | grep -w $net | cut -f 1 -d @" } h2name_or_ip() { @@ -4596,6 +4649,8 @@ mkfs_opts() { var=${type}_FS_MKFS_OPTS fs_mkfs_opts+=${!var:+" ${!var}"} + [[ "$QUOTA_TYPE" =~ "p" ]] && fs_mkfs_opts+=" -O project" + [ $fstype == ldiskfs ] && fs_mkfs_opts=$(squash_opt $fs_mkfs_opts) if [ -n "${fs_mkfs_opts## }" ]; then @@ -6153,9 +6208,14 @@ error_noexit() { exit_status () { local status=0 - local log=$TESTSUITELOG + local logs="$TESTSUITELOG $1" + + for log in $logs; do + if [ -f "$log" ]; then + grep -qw FAIL $log && status=1 + fi + done - [ -f "$log" ] && grep -qw FAIL $log && status=1 exit $status } @@ -6490,10 +6550,8 @@ run_one_logged() { local testmsg=$2 export tfile=f${testnum}.${TESTSUITE} export tdir=d${testnum}.${TESTSUITE} - local name=$TESTSUITE.$TESTNAME.test_log.$(hostname -s).log - local test_log=$LOGDIR/$name - local zfs_log_name=$TESTSUITE.$TESTNAME.zfs_log - local zfs_debug_log=$LOGDIR/$zfs_log_name + local test_log=$TESTLOG_PREFIX.$TESTNAME.test_log.$(hostname -s).log + local zfs_debug_log=$TESTLOG_PREFIX.$TESTNAME.zfs_log local SAVE_UMASK=$(umask) local rc=0 umask 0022 @@ -7151,16 +7209,16 @@ multiop_bg_pause() { } do_and_time () { - local cmd=$1 - local rc - - SECONDS=0 - eval '$cmd' + local cmd="$1" + local start + local rc - [ ${PIPESTATUS[0]} -eq 0 ] || rc=1 + start=$SECONDS + eval '$cmd' + [ ${PIPESTATUS[0]} -eq 0 ] || rc=1 - echo $SECONDS - return $rc + echo $((SECONDS - start)) + return $rc } inodes_available () { @@ -8770,9 +8828,8 @@ test_mkdir() { [ $# -eq 1 ] || error "Only creating single directory is supported" path="$*" + local parent=$(dirname $path) if [ "$p_option" == "-p" ]; then - local parent=$(dirname $path) - [ -d $path ] && return 0 if [ ! -d ${parent} ]; then mkdir -p ${parent} || @@ -8780,7 +8837,7 @@ test_mkdir() { fi fi - if [ $MDSCOUNT -le 1 ]; then + if [ $MDSCOUNT -le 1 ] || ! is_lustre ${parent}; then mkdir $path || error "mkdir '$path' failed" else local mdt_index @@ -9043,10 +9100,21 @@ pool_file_rel_path() { pool_remove_first_target() { echo "Removing first target from a pool" + pool_remove_target $1 -1 +} + +pool_remove_target() { local pool=$1 + local index=$2 local pname="lov.$FSNAME-*.pools.$pool" - local t=$($LCTL get_param -n $pname | head -1) + if [ $index -eq -1 ]; then + local t=$($LCTL get_param -n $pname | head -1) + else + local t=$(printf "$FSNAME-OST%04x_UUID" $index) + fi + + echo "Removing $t from $pool" do_facet mgs $LCTL pool_remove $FSNAME.$pool $t for mds_id in $(seq $MDSCOUNT); do local mdt_id=$((mds_id-1)) @@ -9968,7 +10036,6 @@ init_agt_vars() { export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} export HSMTOOL_TESTDIR - export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") HSM_ARCHIVE_NUMBER=2 @@ -10002,44 +10069,19 @@ get_mdt_devices() { done } -search_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" +copytool_continue() { + local agents=${1:-$(facet_active_host $SINGLEAGT)} + + do_nodesv $agents "libtool execute pkill -CONT -x $HSMTOOL" || return 0 + echo "Copytool is continued on $agents" } kill_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} echo "Killing existing copytools on $hosts" - do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true -} - -wait_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} - local wait_timeout=200 - local wait_start=$SECONDS - local wait_end=$((wait_start + wait_timeout)) - local sleep_time=100000 # 0.1 second - - while ((SECONDS < wait_end)); do - if ! search_copytools $hosts; then - echo "copytools stopped in $((SECONDS - wait_start))s" - return 0 - fi - - echo "copytools still running on $hosts" - usleep $sleep_time - [ $sleep_time -lt 32000000 ] && # 3.2 seconds - sleep_time=$(bc <<< "$sleep_time * 2") - done - - # try to dump Copytool's stack - do_nodesv $hosts "echo 1 >/proc/sys/kernel/sysrq ; " \ - "echo t >/proc/sysrq-trigger" - - echo "copytools failed to stop in ${wait_timeout}s" - - return 1 + do_nodesv $hosts "libtool execute killall -q $HSMTOOL" || true + copytool_continue "$hosts" } copytool_monitor_cleanup() { @@ -10256,20 +10298,15 @@ mdts_set_param() { return $rc } -wait_result() { - local facet=$1 - shift - wait_update --verbose $(facet_active_host $facet) "$@" -} - mdts_check_param() { local key="$1" local target="$2" local timeout="$3" local mdtno + for mdtno in $(seq 1 $MDSCOUNT); do local idx=$(($mdtno - 1)) - wait_result mds${mdtno} \ + wait_update_facet --verbose mds${mdtno} \ "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ $timeout || error "$key state is not '$target' on mds${mdtno}" @@ -10320,7 +10357,7 @@ wait_request_state() { local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" - wait_result $mds "$cmd" "$state" 200 || + wait_update_facet --verbose $mds "$cmd" "$state" 200 || error "request on $fid is not $state on $mds" } @@ -10418,3 +10455,44 @@ sel_layout_sanity() { check_component_count $file $comp_cnt } +statx_supported() { + $STATX --quiet --version + return $? +} + +# +# wrappers for createmany and unlinkmany +# to set debug=0 if number of creates is high enough +# this is to speedup testing +# +function createmany() { + local count=${!#} + + (( count > 100 )) && { + local saved_debug=$($LCTL get_param -n debug) + local list=$(comma_list $(all_nodes)) + + do_nodes $list $LCTL set_param debug=0 + } + $LUSTRE/tests/createmany $* + local rc=$? + (( count > 100 )) && + do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\"" + return $rc +} + +function unlinkmany() { + local count=${!#} + + (( count > 100 )) && { + local saved_debug=$($LCTL get_param -n debug) + local list=$(comma_list $(all_nodes)) + + do_nodes $list $LCTL set_param debug=0 + } + $LUSTRE/tests/unlinkmany $* + local rc=$? + (( count > 100 )) && + do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\"" + return $rc +}