X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=6ba716cf1517d1c3dacf9e06afdedbb1b74fbf0e;hp=678ff62599e92c7a9de2f3416a0a7ce426481a09;hb=4f18e08099e51b682f6acb1cf9fea6d7d45f5fd7;hpb=ede8644550462f905547d5690b6111b15aef8daa diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 678ff62..6ba716c 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -277,6 +277,8 @@ init_test_env() { [ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey) export MCREATE=${MCREATE:-mcreate} export MULTIOP=${MULTIOP:-multiop} + export MMAP_CAT=${MMAP_CAT:-mmap_cat} + export STATX=${STATX:-statx} # Ubuntu, at least, has a truncate command in /usr/bin # so fully path our truncate command. export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate} @@ -453,10 +455,10 @@ check_cpt_number() { # Return a numeric version code based on a version string. The version # code is useful for comparison two version strings to see which is newer. version_code() { - # split arguments like "1.8.6-wc3" into "1", "8", "6", "wc3" - eval set -- $(tr "[:punct:]" " " <<< $*) + # split arguments like "1.8.6-wc3" into "1", "8", "6", "3" + eval set -- $(tr "[:punct:][a-z]" " " <<< $*) - echo -n "$(((${1:-0} << 16) | (${2:-0} << 8) | ${3:-0}))" + echo -n $(((${1:-0}<<24) | (${2:-0}<<16) | (${3:-0}<<8) | (${4:-0}))) } export LINUX_VERSION=$(uname -r | sed -e "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/") @@ -493,14 +495,31 @@ export LINUX_VERSION_CODE=$(version_code ${LINUX_VERSION//\./ }) # output: prints version string to stdout in (up to 4) dotted-decimal values lustre_build_version() { local facet=${1:-client} - local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null || - $LCTL lustre_build_version 2>/dev/null || - $LCTL --version 2>/dev/null | cut -d' ' -f2") + local facet_version=${facet}_VERSION + + # if the global variable is already set, then use that + [ -n "${!facet_version}" ] && echo ${!facet_version} && return + + # this is the currently-running version of the kernel modules + local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null") + # we mostly test 2.10+ systems, only try others if the above fails + if [ -z "$ver" ]; then + ver=$(do_facet $facet "$LCTL lustre_build_version 2>/dev/null") + fi + if [ -z "$ver" ]; then + ver=$(do_facet $facet "$LCTL --version 2>/dev/null" | + cut -d' ' -f2) + fi local lver=$(egrep -i "lustre: |version: " <<<"$ver" | head -n 1) [ -n "$lver" ] && ver="$lver" - sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' -e 's/_/./g' <<<$ver | - cut -d. -f1-4 + lver=$(sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' <<<$ver | + tr _ . | cut -d. -f1-4) + + # save in global variable for the future + export $facet_version=$lver + + echo $lver } # Report the Lustre numeric build version code for the supplied facet. @@ -683,6 +702,7 @@ load_modules_local() { # if there is more than 4 CPU cores, libcfs should create multiple CPU # partitions. So we just force libcfs to create 2 partitions for # system with 2 or 4 cores + local saved_opts="$MODOPTS_LIBCFS" if [ $ncpus -le 4 ] && [ $ncpus -gt 1 ]; then # force to enable multiple CPU partitions echo "Force libcfs to create 2 CPU partitions" @@ -694,7 +714,7 @@ load_modules_local() { load_module ../libcfs/libcfs/libcfs # Prevent local MODOPTS_LIBCFS being passed as part of environment # variable to remote nodes - unset MODOPTS_LIBCFS + MODOPTS_LIBCFS=$saved_opts set_default_debug load_module ../lnet/lnet/lnet @@ -718,8 +738,8 @@ load_modules_local() { load_module fid/fid load_module lmv/lmv load_module osc/osc - load_module mdc/mdc load_module lov/lov + load_module mdc/mdc load_module mgc/mgc load_module obdecho/obdecho if ! client_only; then @@ -789,16 +809,16 @@ load_modules () { } check_mem_leak () { - LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) - LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true) - if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then - echo "$LEAK_LUSTRE" 1>&2 - echo "$LEAK_PORTALS" 1>&2 - mv $TMP/debug $TMP/debug-leak.`date +%s` || true - echo "Memory leaks detected" - [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true - return 1 - fi + LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) + LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" || true) + if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then + echo "$LEAK_LUSTRE" 1>&2 + echo "$LEAK_PORTALS" 1>&2 + mv $TMP/debug $TMP/debug-leak.`date +%s` || true + echo "Memory leaks detected" + [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true + return 1 + fi } unload_modules() { @@ -1013,6 +1033,16 @@ init_gss() { start_gss_daemons || error_exit "start gss daemon failed! rc=$?" fi + if $GSS_SK && ! $SK_NO_KEY; then + echo "Loading basic SSK keys on all servers" + do_nodes $(comma_list $(all_server_nodes)) \ + "lgss_sk -t server -l $SK_PATH/$FSNAME.key || true" + do_nodes $(comma_list $(all_server_nodes)) \ + "keyctl show | grep lustre | cut -c1-11 | + sed -e 's/ //g;' | + xargs -IX keyctl setperm X 0x3f3f3f3f" + fi + if $GSS_SK && $SK_NO_KEY; then local numclients=${1:-$CLIENTCOUNT} local clients=${CLIENTS:-$HOSTNAME} @@ -1597,6 +1627,22 @@ set_default_debug_facet () { set_default_debug_nodes $node "$debug" "$subsys" $debug_size } +set_params_nodes () { + [[ $# -ge 2 ]] || return 0 + + local nodes=$1 + shift + do_nodes $nodes $LCTL set_param $@ +} + +set_params_clients () { + local clients=${1:-$CLIENTS} + local params=${2:-$CLIENT_LCTL_SETPARAM_PARAM} + + [[ -n $params ]] || return 0 + set_params_nodes $clients $params +} + set_hostid () { local hostid=${1:-$(hostid)} @@ -2199,10 +2245,11 @@ mdt_free_inodes() { ost_dev_status() { local ost_idx=$1 local mnt_pnt=${2:-$MOUNT} + local opts=$3 local ost_uuid ost_uuid=$(ostuuid_from_index $ost_idx $mnt_pnt) - lfs_df $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }' + lfs_df $opts $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }' } setup_quota(){ @@ -2308,6 +2355,7 @@ zconf_mount() { fi set_default_debug_nodes $client + set_params_clients $client return 0 } @@ -2344,7 +2392,6 @@ zconf_umount() { mount_mds_client() { local mds_HOST=${SINGLEMDS}_HOST echo $mds_HOST - do_facet $SINGLEMDS "mkdir -p $MOUNT2" zconf_mount $mds1_HOST $MOUNT2 $MOUNT_OPTS || error "unable to mount $MOUNT2 on MDS" } @@ -2353,7 +2400,7 @@ mount_mds_client() { umount_mds_client() { local mds_HOST=${SINGLEMDS}_HOST zconf_umount $mds1_HOST $MOUNT2 - do_facet $SINGLEMDS "rm -rf $MOUNT2" + do_facet $SINGLEMDS "rmdir $MOUNT2" } # nodes is comma list @@ -2535,6 +2582,7 @@ exit \\\$rc" || return ${PIPESTATUS[0]} do_nodes $clients "mount | grep $mnt' '" set_default_debug_nodes $clients + set_params_clients $clients return 0 } @@ -2668,19 +2716,26 @@ remount_facet() { reboot_facet() { local facet=$1 + local node=$(facet_active_host $facet) + if [ "$FAILURE_MODE" = HARD ]; then - reboot_node $(facet_active_host $facet) + boot_node $node else sleep 10 fi } boot_node() { - local node=$1 - if [ "$FAILURE_MODE" = HARD ]; then - reboot_node $node - wait_for_host $node - fi + local node=$1 + + if [ "$FAILURE_MODE" = HARD ]; then + reboot_node $node + wait_for_host $node + if $LOAD_MODULES_REMOTE; then + echo "loading modules on $node: $facet" + do_rpc_nodes $node load_modules_local + fi + fi } facets_hosts () { @@ -2963,54 +3018,103 @@ cleanup_check() { return 0 } -wait_update () { +## +# wait for a command to return the expected result +# +# This will run @check on @node repeatedly until the output matches @expect +# based on the supplied condition, or until @max_wait seconds have elapsed, +# whichever comes first. @cond may be one of the normal bash operators, +# "-gt", "-ge", "-eq", "-le", "-lt", "==", "!=", or "=~", and must be quoted +# in the caller to avoid unintentional evaluation by the shell in the caller. +# +# If @max_wait is not specified, the condition will be checked for up to 90s. +# +# If --verbose is passed as the first argument, the result is printed on each +# value change, otherwise it is only printed after every 10s interval. +# +# Using wait_update_cond() or related helper function is preferable to adding +# a "long enough" wait for some state to change in the background, since +# "long enough" may be too short due to tunables, system config, or running in +# a VM, and must by necessity wait too long for most cases or risk failure. +# +# usage: wait_update_cond [--verbose] node check cond expect [max_wait] +wait_update_cond() { local verbose=false - if [[ "$1" == "--verbose" ]]; then - shift - verbose=true - fi + [[ "$1" == "--verbose" ]] && verbose=true && shift local node=$1 - local TEST=$2 - local FINAL=$3 - local MAX=${4:-90} - local RESULT - local PREV_RESULT - local WAIT=0 + local check="$2" + local cond="$3" + local expect="$4" + local max_wait=${5:-90} + local result + local prev_result + local waited=0 + local begin=$SECONDS local sleep=1 local print=10 - PREV_RESULT=$(do_node $node "$TEST") - while [ true ]; do - RESULT=$(do_node $node "$TEST") - if [[ "$RESULT" == "$FINAL" ]]; then - [[ -z "$RESULT" || $WAIT -le $sleep ]] || - echo "Updated after ${WAIT}s: wanted '$FINAL'"\ - "got '$RESULT'" + while (( $waited <= $max_wait )); do + result=$(do_node $node "$check") + + eval [[ "'$result'" $cond "'$expect'" ]] + if [[ $? == 0 ]]; then + [[ -z "$result" || $waited -le $sleep ]] || + echo "Updated after ${waited}s: want '$expect' got '$result'" return 0 fi - if [[ $verbose && "$RESULT" != "$PREV_RESULT" ]]; then - echo "Changed after ${WAIT}s: from '$PREV_RESULT'"\ - "to '$RESULT'" - PREV_RESULT=$RESULT + if $verbose && [[ "$result" != "$prev_result" ]]; then + [[ -n "$prev_result" ]] && + echo "Changed after ${waited}s: from '$prev_result' to '$result'" + prev_result="$result" fi - [[ $WAIT -ge $MAX ]] && break - [[ $((WAIT % print)) -eq 0 ]] && - echo "Waiting $((MAX - WAIT)) secs for update" - WAIT=$((WAIT + sleep)) + (( $waited % $print == 0 )) && + echo "Waiting $((max_wait - waited))s for '$expect'" sleep $sleep + waited=$((SECONDS - begin)) done - echo "Update not seen after ${MAX}s: wanted '$FINAL' got '$RESULT'" + echo "Update not seen after ${max_wait}s: want '$expect' got '$result'" return 3 } +# usage: wait_update [--verbose] node check expect [max_wait] +wait_update() { + local verbose= + [ "$1" = "--verbose" ] && verbose="$1" && shift + + local node="$1" + local check="$2" + local expect="$3" + local max_wait=$4 + + wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait +} + +# usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait] +wait_update_facet_cond() { + local verbose= + [ "$1" = "--verbose" ] && verbose="$1" && shift + + local node=$(facet_active_host $1) + local check="$2" + local cond="$3" + local expect="$4" + local max_wait=$5 + + wait_update_cond $verbose $node "$check" "$cond" "$expect" $max_wait +} + +# usage: wait_update_facet [--verbose] facet check expect [max_wait] wait_update_facet() { local verbose= [ "$1" = "--verbose" ] && verbose="$1" && shift - local facet=$1 - shift - wait_update $verbose $(facet_active_host $facet) "$@" + local node=$(facet_active_host $1) + local check="$2" + local expect="$3" + local max_wait=$4 + + wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait } sync_all_data() { @@ -3222,13 +3326,13 @@ wait_delete_completed_mds() { } wait_for_host() { - local hostlist=$1 + local hostlist=$1 - # we can use "for" here because we are waiting the slowest - for host in ${hostlist//,/ }; do - check_network "$host" 900 - done - while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done + # we can use "for" here because we are waiting the slowest + for host in ${hostlist//,/ }; do + check_network "$host" 900 + done + while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done } wait_for_facet() { @@ -3669,16 +3773,19 @@ fail() { } fail_nodf() { - local facet=$1 - facet_failover $facet + local facet=$1 + + facet_failover $facet } fail_abort() { local facet=$1 + local abort_type=${2:-"abort_recovery"} + stop $facet change_active $facet wait_for_facet $facet - mount_facet $facet -o abort_recovery + mount_facet $facet -o $abort_type clients_up || echo "first stat failed: $?" clients_up || error "post-failover stat: $?" } @@ -3687,7 +3794,7 @@ host_nids_address() { local nodes=$1 local net=${2:-"."} - do_nodes $nodes "$LCTL list_nids | grep $net | cut -f 1 -d @" + do_nodes $nodes "$LCTL list_nids | grep -w $net | cut -f 1 -d @" } h2name_or_ip() { @@ -4123,6 +4230,12 @@ add() { stop ${facet} -f rm -f $TMP/${facet}active [[ $facet = mds1 ]] && combined_mgs_mds && rm -f $TMP/mgsactive + + # make sure in-tree ldiskfs is loaded before mkfs + if local_mode && [[ $(node_fstypes $HOSTNAME) == *ldiskfs* ]]; then + load_module ../ldiskfs/ldiskfs + fi + do_facet ${facet} $MKFS $* || return ${PIPESTATUS[0]} if [[ $(facet_fstype $facet) == zfs ]]; then @@ -4580,6 +4693,8 @@ mkfs_opts() { var=${type}_FS_MKFS_OPTS fs_mkfs_opts+=${!var:+" ${!var}"} + [[ "$QUOTA_TYPE" =~ "p" ]] && fs_mkfs_opts+=" -O project" + [ $fstype == ldiskfs ] && fs_mkfs_opts=$(squash_opt $fs_mkfs_opts) if [ -n "${fs_mkfs_opts## }" ]; then @@ -5218,8 +5333,9 @@ init_param_vars () { fi fi - do_nodes $(comma_list $(mdts_nodes)) \ - "$LCTL set_param lod.*.mdt_hash=crush" + (( MDS1_VERSION <= $(version_code 2.13.52) )) || + do_nodes $(comma_list $(mdts_nodes)) \ + "$LCTL set_param lod.*.mdt_hash=crush" return 0 } @@ -5376,17 +5492,18 @@ check_and_setup_lustre() { export I_MOUNTED2=yes fi - if $do_check; then - # FIXME: what to do if check_config failed? - # i.e. if: - # 1) remote client has mounted other Lustre fs? - # 2) lustre is mounted on remote_clients atall ? - check_config_clients $MOUNT - init_facets_vars - init_param_vars + if $do_check; then + # FIXME: what to do if check_config failed? + # i.e. if: + # 1) remote client has mounted other Lustre fs? + # 2) lustre is mounted on remote_clients atall ? + check_config_clients $MOUNT + init_facets_vars + init_param_vars - set_default_debug_nodes $(comma_list $(nodes_list)) - fi + set_default_debug_nodes $(comma_list $(nodes_list)) + set_params_clients + fi if [ -z "$CLIENTONLY" -a $(lower $OSD_TRACK_DECLARES_LBUG) == 'yes' ]; then local facets="" @@ -5496,6 +5613,8 @@ run_e2fsck() { local log=$TMP/e2fsck.log local rc=0 + # turn on pfsck if it is supported + do_node $node $E2FSCK -h 2>&1 | grep -qw -- -m && cmd+=" -m8" echo $cmd do_node $node $cmd 2>&1 | tee $log rc=${PIPESTATUS[0]} @@ -5689,13 +5808,10 @@ check_network() { [ "$host" = "$HOSTNAME" ] && return 0 - echo "$(date +'%H:%M:%S (%s)') waiting for $host network $max secs ..." - if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then - echo "Network not available!" + if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep; then + echo "$(date +'%H:%M:%S (%s)') waited for $host network ${max}s" exit 1 fi - - echo "$(date +'%H:%M:%S (%s)') network interface is UP" } no_dsh() { @@ -5972,9 +6088,9 @@ cancel_lru_locks() { default_lru_size() { - NR_CPU=$(grep -c "processor" /proc/cpuinfo) - DEFAULT_LRU_SIZE=$((100 * NR_CPU)) - echo "$DEFAULT_LRU_SIZE" + local nr_cpu=$(grep -c "processor" /proc/cpuinfo) + + echo $((100 * nr_cpu)) } lru_resize_enable() @@ -5984,7 +6100,10 @@ lru_resize_enable() lru_resize_disable() { - lctl set_param ldlm.namespaces.*$1*.lru_size $(default_lru_size) + local dev=${1} + local lru_size=${2:-$(default_lru_size)} + + $LCTL set_param ldlm.namespaces.*$dev*.lru_size=$lru_size } flock_is_enabled() @@ -6139,9 +6258,14 @@ error_noexit() { exit_status () { local status=0 - local log=$TESTSUITELOG + local logs="$TESTSUITELOG $1" + + for log in $logs; do + if [ -f "$log" ]; then + grep -qw FAIL $log && status=1 + fi + done - [ -f "$log" ] && grep -qw FAIL $log && status=1 exit $status } @@ -6476,10 +6600,8 @@ run_one_logged() { local testmsg=$2 export tfile=f${testnum}.${TESTSUITE} export tdir=d${testnum}.${TESTSUITE} - local name=$TESTSUITE.$TESTNAME.test_log.$(hostname -s).log - local test_log=$LOGDIR/$name - local zfs_log_name=$TESTSUITE.$TESTNAME.zfs_log - local zfs_debug_log=$LOGDIR/$zfs_log_name + local test_log=$TESTLOG_PREFIX.$TESTNAME.test_log.$(hostname -s).log + local zfs_debug_log=$TESTLOG_PREFIX.$TESTNAME.zfs_log local SAVE_UMASK=$(umask) local rc=0 umask 0022 @@ -6519,7 +6641,7 @@ run_one_logged() { TEST_STATUS="PASS" fi - pass "$testnum" "($((SECONDS - before))s)" + pass "$testnum" "(${duration_sub}s)" log_sub_test_end $TEST_STATUS $duration_sub "$rc" "$test_error" [[ $rc != 0 ]] && break done @@ -7137,16 +7259,16 @@ multiop_bg_pause() { } do_and_time () { - local cmd=$1 - local rc - - SECONDS=0 - eval '$cmd' + local cmd="$1" + local start + local rc - [ ${PIPESTATUS[0]} -eq 0 ] || rc=1 + start=$SECONDS + eval '$cmd' + [ ${PIPESTATUS[0]} -eq 0 ] || rc=1 - echo $SECONDS - return $rc + echo $((SECONDS - start)) + return $rc } inodes_available () { @@ -7219,13 +7341,9 @@ check_node_health() { for node in ${nodes//,/ }; do check_network "$node" 5 if [ $? -eq 0 ]; then - do_node $node "rc=0; - val=\\\$($LCTL get_param -n catastrophe 2>&1); - if [[ \\\$? -eq 0 && \\\$val -ne 0 ]]; then - echo \\\$(hostname -s): \\\$val; - rc=\\\$val; - fi; - exit \\\$rc" || error "$node:LBUG/LASSERT detected" + do_node $node "$LCTL get_param catastrophe 2>&1" | + grep -q "catastrophe=1" && + error "$node:LBUG/LASSERT detected" || true fi done } @@ -8344,13 +8462,10 @@ init_logging() { umask $save_umask - # If modules are not yet loaded then older "lctl lustre_build_version" - # will fail. Use lctl build version instead. - log "Client: $($LCTL lustre_build_version)" - log "MDS: $(do_facet $SINGLEMDS $LCTL lustre_build_version 2>/dev/null|| - do_facet $SINGLEMDS $LCTL --version)" - log "OSS: $(do_facet ost1 $LCTL lustre_build_version 2> /dev/null || - do_facet ost1 $LCTL --version)" + # log actual client and server versions if needed for debugging + log "Client: $(lustre_build_version client)" + log "MDS: $(lustre_build_version mds1)" + log "OSS: $(lustre_build_version ost1)" } log_test() { @@ -8763,9 +8878,8 @@ test_mkdir() { [ $# -eq 1 ] || error "Only creating single directory is supported" path="$*" + local parent=$(dirname $path) if [ "$p_option" == "-p" ]; then - local parent=$(dirname $path) - [ -d $path ] && return 0 if [ ! -d ${parent} ]; then mkdir -p ${parent} || @@ -8773,7 +8887,7 @@ test_mkdir() { fi fi - if [ $MDSCOUNT -le 1 ]; then + if [ $MDSCOUNT -le 1 ] || ! is_lustre ${parent}; then mkdir $path || error "mkdir '$path' failed" else local mdt_index @@ -9036,10 +9150,21 @@ pool_file_rel_path() { pool_remove_first_target() { echo "Removing first target from a pool" + pool_remove_target $1 -1 +} + +pool_remove_target() { local pool=$1 + local index=$2 local pname="lov.$FSNAME-*.pools.$pool" - local t=$($LCTL get_param -n $pname | head -1) + if [ $index -eq -1 ]; then + local t=$($LCTL get_param -n $pname | head -1) + else + local t=$(printf "$FSNAME-OST%04x_UUID" $index) + fi + + echo "Removing $t from $pool" do_facet mgs $LCTL pool_remove $FSNAME.$pool $t for mds_id in $(seq $MDSCOUNT); do local mdt_id=$((mds_id-1)) @@ -9591,12 +9716,24 @@ changelog_clear() { } changelog_dump() { + local rc + for M in $(seq $MDSCOUNT); do local facet=mds$M local mdt="$(facet_svc $facet)" - - $LFS changelog $mdt | sed -e 's/^/'$mdt'./' + local output + local ret + + output=$($LFS changelog $mdt) + ret=$? + if [ $ret -ne 0 ]; then + rc=${rc:-$ret} + elif [ -n "$output" ]; then + echo "$output" | sed -e 's/^/'$mdt'./' + fi done + + return ${rc:-0} } changelog_extract_field() { @@ -9945,11 +10082,15 @@ init_agt_vars() { export SINGLEAGT=${SINGLEAGT:-agt1} export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} + export HSMTOOL_PID_FILE=${HSMTOOL_PID_FILE:-"/var/run/lhsmtool_posix.pid"} export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} export HSMTOOL_TESTDIR - export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") + + if ! [[ $HSMTOOL =~ hsmtool ]]; then + echo "HSMTOOL = '$HSMTOOL' does not contain 'hsmtool', GLWT" >&2 + fi HSM_ARCHIVE_NUMBER=2 @@ -9983,44 +10124,26 @@ get_mdt_devices() { done } -search_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" +pkill_copytools() { + local hosts="$1" + local signal="$2" + + do_nodes "$hosts" "pkill --pidfile=$HSMTOOL_PID_FILE --signal=$signal hsmtool" } -kill_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} +copytool_continue() { + local agents=${1:-$(facet_active_host $SINGLEAGT)} - echo "Killing existing copytools on $hosts" - do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true + pkill_copytools "$agents" CONT || return 0 + echo "Copytool is continued on $agents" } -wait_copytools() { +kill_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} - local wait_timeout=200 - local wait_start=$SECONDS - local wait_end=$((wait_start + wait_timeout)) - local sleep_time=100000 # 0.1 second - - while ((SECONDS < wait_end)); do - if ! search_copytools $hosts; then - echo "copytools stopped in $((SECONDS - wait_start))s" - return 0 - fi - - echo "copytools still running on $hosts" - usleep $sleep_time - [ $sleep_time -lt 32000000 ] && # 3.2 seconds - sleep_time=$(bc <<< "$sleep_time * 2") - done - # try to dump Copytool's stack - do_nodesv $hosts "echo 1 >/proc/sys/kernel/sysrq ; " \ - "echo t >/proc/sysrq-trigger" - - echo "copytools failed to stop in ${wait_timeout}s" - - return 1 + echo "Killing existing copytools on $hosts" + pkill_copytools "$hosts" TERM || return 0 + copytool_continue "$hosts" } copytool_monitor_cleanup() { @@ -10067,16 +10190,17 @@ __lhsmtool_import() __lhsmtool_setup() { - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root \"$hsm_root\"" + local host="$(facet_host "$facet")" + local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --pid-file=$HSMTOOL_PID_FILE --hsm-root \"$hsm_root\"" [ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth" [ -n "$archive_id" ] && cmd+=" --archive $archive_id" [ ${#misc_options[@]} -gt 0 ] && cmd+=" $(IFS=" " echo "$@")" cmd+=" \"$mountpoint\"" - echo "Starting copytool $facet on $(facet_host $facet)" - stack_trap "do_facet $facet libtool execute pkill -x '$HSMTOOL' || true" EXIT - do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1" + echo "Starting copytool '$facet' on '$host'" + stack_trap "pkill_copytools $host TERM || true" EXIT + do_node "$host" "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1" } hsm_root() { @@ -10237,20 +10361,15 @@ mdts_set_param() { return $rc } -wait_result() { - local facet=$1 - shift - wait_update --verbose $(facet_active_host $facet) "$@" -} - mdts_check_param() { local key="$1" local target="$2" local timeout="$3" local mdtno + for mdtno in $(seq 1 $MDSCOUNT); do local idx=$(($mdtno - 1)) - wait_result mds${mdtno} \ + wait_update_facet --verbose mds${mdtno} \ "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ $timeout || error "$key state is not '$target' on mds${mdtno}" @@ -10301,7 +10420,7 @@ wait_request_state() { local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" - wait_result $mds "$cmd" "$state" 200 || + wait_update_facet --verbose $mds "$cmd" "$state" 200 || error "request on $fid is not $state on $mds" } @@ -10399,3 +10518,58 @@ sel_layout_sanity() { check_component_count $file $comp_cnt } +statx_supported() { + $STATX --quiet --version + return $? +} + +# +# wrappers for createmany and unlinkmany +# to set debug=0 if number of creates is high enough +# this is to speedup testing +# +function createmany() { + local count=${!#} + + (( count > 100 )) && { + local saved_debug=$($LCTL get_param -n debug) + local list=$(comma_list $(all_nodes)) + + do_nodes $list $LCTL set_param debug=0 + } + $LUSTRE/tests/createmany $* + local rc=$? + (( count > 100 )) && + do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\"" + return $rc +} + +function unlinkmany() { + local count=${!#} + + (( count > 100 )) && { + local saved_debug=$($LCTL get_param -n debug) + local list=$(comma_list $(all_nodes)) + + do_nodes $list $LCTL set_param debug=0 + } + $LUSTRE/tests/unlinkmany $* + local rc=$? + (( count > 100 )) && + do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\"" + return $rc +} + +function check_for_fallocate() +{ + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + local osts=$(comma_list $(osts_nodes)) + local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks" + local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null| + head -n 1) + + [ -n "$old_mode" ] || skip "need at least 2.13.57 for fallocate" + stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode" + do_nodes $osts $LCTL set_param $fa_mode=0 || error "set $fa_mode=0" +} +