X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-hsm.sh;h=b234123a3d54750deec88a2dfa45b885ab197de8;hp=38e235b500971d9cb790206c73dd616f378b1978;hb=66b3e74bccf1451d135b7f331459b6af1c06431b;hpb=470bdeec6ca5b4c68f456a10d68511653e67b378 diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 38e235b..b234123 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -7,41 +7,63 @@ set -e set +o monitor -SRCDIR=$(dirname $0) -export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin - ONLY=${ONLY:-"$*"} -# bug number for skipped test: -# LU-4684 -ALWAYS_EXCEPT="406 $SANITY_HSM_EXCEPT" -# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! - -LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +LUSTRE=${LUSTRE:-$(dirname $0)/..} . $LUSTRE/tests/test-framework.sh -init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_test_env "$@" init_logging -MULTIOP=${MULTIOP:-multiop} +ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT " +if $SHARED_KEY; then +# bug number for skipped tests: LU-9795 + ALWAYS_EXCEPT+=" 402b " +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! +fi + +# Skip tests for PPC that fail frequently +if [[ $(uname -m) = ppc64 ]]; then + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 1a 1b 1d 1e 12c 12f " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 12g 12h 12m 12n 12o 12p " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 12q 21 22 23 24a 24b " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 24d 24e 24f 25b 30c 37 " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 57 58 90 110b 111b 113 " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 222b 222d 228 260a 260b 260c " + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 220A 220a 221 222a 222c 223a " + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 223b 224A 224a 226 227 600" + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 601 602 603 604 605 " +fi + +build_test_filter + +[ -n "$FILESET" ] && skip "Not functional for FILESET set" + OPENFILE=${OPENFILE:-openfile} -MMAP_CAT=${MMAP_CAT:-mmap_cat} MOUNT_2=${MOUNT_2:-"yes"} FAIL_ON_ERROR=false # script only handles up to 10 MDTs (because of MDT_PREFIX) [ $MDSCOUNT -gt 9 ] && - error "script cannot handle more than 9 MDTs, please fix" && exit + error "script cannot handle more than 9 MDTs, please fix" check_and_setup_lustre -if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ]]; then - skip_env "Need MDS version at least 2.4.53" && exit +if [[ $MDS1_VERSION -lt $(version_code 2.4.53) ]]; then + skip_env "Need MDS version at least 2.4.53" fi # $RUNAS_ID may get set incorrectly somewhere else if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then - skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit + skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" fi check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS if getent group nobody; then @@ -52,8 +74,6 @@ else error "No generic nobody group" fi -build_test_filter - # if there is no CLIENT1 defined, some tests can be ran on localhost CLIENT1=${CLIENT1:-$HOSTNAME} # if CLIENT2 doesn't exist then use CLIENT1 instead @@ -62,98 +82,9 @@ CLIENT1=${CLIENT1:-$HOSTNAME} # Exception is the test which need two separate nodes CLIENT2=${CLIENT2:-$CLIENT1} -# -# In order to test multiple remote HSM agents, a new facet type named "AGT" and -# the following associated variables are added: -# -# AGTCOUNT: number of agents -# AGTDEV{N}: target HSM mount point (root path of the backend) -# agt{N}_HOST: hostname of the agent agt{N} -# SINGLEAGT: facet of the single agent -# -# The number of agents is initialized as the number of remote client nodes. -# By default, only single copytool is started on a remote client/agent. If there -# was no remote client, then the copytool will be started on the local client. -# -init_agt_vars() { - local n - local agent - - export AGTCOUNT=${AGTCOUNT:-$((CLIENTCOUNT - 1))} - [[ $AGTCOUNT -gt 0 ]] || AGTCOUNT=1 - - export SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP} - if [[ $CLIENTCOUNT -gt 1 ]] && - ! check_shared_dir $SHARED_DIRECTORY $CLIENTS; then - skip_env "SHARED_DIRECTORY should be accessible"\ - "on all client nodes" - exit 0 - fi - - # We used to put the HSM archive in $SHARED_DIRECTORY but that - # meant NFS issues could hose sanity-hsm sessions. So now we - # use $TMP instead. - for n in $(seq $AGTCOUNT); do - eval export AGTDEV$n=\$\{AGTDEV$n:-"$TMP/arc$n"\} - agent=CLIENT$((n + 1)) - if [[ -z "${!agent}" ]]; then - [[ $CLIENTCOUNT -eq 1 ]] && agent=CLIENT1 || - agent=CLIENT2 - fi - eval export agt${n}_HOST=\$\{agt${n}_HOST:-${!agent}\} - done - - export SINGLEAGT=${SINGLEAGT:-agt1} - - export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} - export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} - export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} - export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} - export HSMTOOL_TESTDIR - export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") - - HSM_ARCHIVE_NUMBER=2 - - # The test only support up to 10 MDTs - MDT_PREFIX="mdt.$FSNAME-MDT000" - HSM_PARAM="${MDT_PREFIX}0.hsm" - - # archive is purged at copytool setup - HSM_ARCHIVE_PURGE=true - - # Don't allow copytool error upon start/setup - HSMTOOL_NOERROR=false -} - -# Get the backend root path for the given agent facet. -copytool_device() { - local facet=$1 - local dev=AGTDEV$(facet_number $facet) - - echo -n ${!dev} -} - -get_mdt_devices() { - local mdtno - # get MDT device for each mdc - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - MDT[$idx]=$($LCTL get_param -n \ - mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid | - awk '{gsub(/_UUID/,""); print $1}' | head -n1) - done -} - search_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" -} - -kill_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} - - echo "Killing existing copytools on $hosts" - do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true + do_nodesv $hosts "pgrep --pidfile=$HSMTOOL_PID_FILE hsmtool" } wait_copytools() { @@ -161,7 +92,7 @@ wait_copytools() { local wait_timeout=200 local wait_start=$SECONDS local wait_end=$((wait_start + wait_timeout)) - local sleep_time=100000 # 0.1 second + local sleep_time=1 while ((SECONDS < wait_end)); do if ! search_copytools $hosts; then @@ -170,9 +101,8 @@ wait_copytools() { fi echo "copytools still running on $hosts" - usleep $sleep_time - [ $sleep_time -lt 32000000 ] && # 3.2 seconds - sleep_time=$(bc <<< "$sleep_time * 2") + sleep $sleep_time + [ $sleep_time -lt 5 ] && sleep_time=$((sleep_time + 1)) done # try to dump Copytool's stack @@ -199,45 +129,17 @@ copytool_monitor_setup() { cmd="cat $test_dir/fifo > $test_dir/events &" cmd+=" echo \\\$! > $test_dir/monitor_pid" - if [[ $PDSH == *Rmrsh* ]]; then - # This is required for pdsh -Rmrsh and its handling of remote - # shells. - # Regular ssh and pdsh -Rssh work fine without this - # backgrounded subshell nonsense. - (do_node $agent "$cmd") & - export HSMTOOL_MONITOR_PDSH=$! + # This background subshell nonsense is required when pdsh/ssh decides + # to wait for the cat process to exit on the remote client + (do_node $agent "$cmd") & + export HSMTOOL_MONITOR_PDSH=$! - # Slightly racy, but just making a best-effort to catch obvious - # problems. - sleep 1 - ps -p $HSMTOOL_MONITOR_PDSH > /dev/null || - error "Failed to start copytool monitor on $agent" - else - do_node $agent "$cmd" - if [ $? != 0 ]; then - error "Failed to start copytool monitor on $agent" - fi - fi -} - -copytool_monitor_cleanup() { - local facet=${1:-$SINGLEAGT} - local agent=$(facet_active_host $facet) - - if [ -n "$HSMTOOL_MONITOR_DIR" ]; then - # Should die when the copytool dies, but just in case. - local cmd="kill \\\$(cat $HSMTOOL_MONITOR_DIR/monitor_pid)" - cmd+=" 2>/dev/null || true" - do_node $agent "$cmd" - do_node $agent "rm -fr $HSMTOOL_MONITOR_DIR" - export HSMTOOL_MONITOR_DIR= - fi - - # The pdsh should die on its own when the monitor dies. Just - # in case, though, try to clean up to avoid any cruft. - if [ -n "$HSMTOOL_MONITOR_PDSH" ]; then - kill $HSMTOOL_MONITOR_PDSH 2>/dev/null || true - export HSMTOOL_MONITOR_PDSH= + # Slightly racy, but just making a best-effort to catch obvious + # problems. + sleep 1 + do_node $agent "stat $HSMTOOL_MONITOR_DIR/monitor_pid 2>&1 > /dev/null" + if [ $? != 0 ]; then + error "Failed to start copytool monitor on $agent" fi } @@ -245,139 +147,14 @@ fid2archive() { local fid="$1" - case "$HSMTOOL" in - lhsmtool_posix) - printf "%s" "$(hsm_root)/*/*/*/*/*/*/$fid" - ;; - esac -} - -copytool_logfile() -{ - local host="$(facet_host "$1")" - local prefix=$TESTLOG_PREFIX - [ -n "$TESTNAME" ] && prefix+=.$TESTNAME - - printf "${prefix}.copytool${archive_id}_log.${host}.log" -} - -__lhsmtool_rebind() -{ - do_facet $facet $HSMTOOL -p "$hsm_root" --rebind "$@" "$mountpoint" -} - -__lhsmtool_import() -{ - mkdir -p "$(dirname "$2")" || - error "cannot create directory '$(dirname "$2")'" - do_facet $facet $HSMTOOL -p "$hsm_root" --import "$@" "$mountpoint" -} - -__lhsmtool_setup() -{ - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root \"$hsm_root\"" - [ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth" - [ -n "$archive_id" ] && cmd+=" --archive $archive_id" - [ ${#misc_options[@]} -gt 0 ] && - cmd+=" $(IFS=" " echo "$@")" - cmd+=" \"$mountpoint\"" - - echo "Starting copytool $facet on $(facet_host $facet)" - stack_trap "do_facet $facet \"pkill -x $HSMTOOL_BASE\" || true" EXIT - do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1" -} - -hsm_root() { - local facet="${1:-$SINGLEAGT}" - - printf "$(copytool_device "$facet")/${TESTSUITE}.${TESTNAME}/" -} - -# Main entry point to perform copytool related operations -# -# Sub-commands: -# -# setup setup a copytool to run in the background, that copytool will be -# killed on EXIT -# import import a file from an HSM backend -# rebind rebind an archived file to a new fid -# -# Although the semantics might suggest otherwise, one does not need to 'setup' -# a copytool before a call to 'copytool import' or 'copytool rebind'. -# -copytool() -{ - local action=$1 - shift - - # Parse arguments - local fail_on_error=true - local -a misc_options - while [ $# -gt 0 ]; do - case "$1" in - -f|--facet) - shift - local facet="$1" - ;; - -m|--mountpoint) - shift - local mountpoint="$1" - ;; - -a|--archive-id) - shift - local archive_id="$1" + case "$HSMTOOL_ARCHIVE_FORMAT" in + v1) + printf "%s" "$(hsm_root)/*/*/*/*/*/*/$fid" ;; - -b|--bwlimit) - shift - local bandwidth="$1" # in MB/s + v2) + printf "%s" "$(hsm_root)/*/$fid" ;; - -n|--no-fail) - local fail_on_error=false - ;; - *) - # Uncommon(/copytool dependent) option - misc_options+=("$1") - ;; - esac - shift - done - - # Use default values if needed - local facet=${facet:-$SINGLEAGT} - local mountpoint="${mountpoint:-${MOUNT2:-$MOUNT}}" - local hsm_root="$(hsm_root "$facet")" - - stack_trap "do_facet $facet rm -rf '$hsm_root'" EXIT - do_facet $facet mkdir -p "$hsm_root" || - error "mkdir '$hsm_root' failed" - - case "$HSMTOOL" in - lhsmtool_posix) - local copytool=lhsmtool - ;; esac - - __${copytool}_${action} "${misc_options[@]}" - if [ $? -ne 0 ]; then - local error_msg - - case $action in - setup) - local host="$(facet_host $facet)" - error_msg="Failed to start copytool $facet on '$host'" - ;; - import) - local src="${misc_options[0]}" - local dest="${misc_options[1]}" - error_msg="Failed to import '$src' to '$dest'" - ;; - rebind) - error_msg="could not rebind file" - ;; - esac - - $fail_on_error && error "$error_msg" || echo "$error_msg" - fi } get_copytool_event_log() { @@ -394,17 +171,11 @@ get_copytool_event_log() { copytool_suspend() { local agents=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $agents "pkill -STOP -x $HSMTOOL_BASE" || return 0 + stack_trap "pkill_copytools $agents CONT || true" EXIT + pkill_copytools $agents STOP || return 0 echo "Copytool is suspended on $agents" } -copytool_continue() { - local agents=${1:-$(facet_active_host $SINGLEAGT)} - - do_nodesv $agents "pkill -CONT -x $HSMTOOL_BASE" || return 0 - echo "Copytool is continued on $agents" -} - copytool_remove_backend() { local fid=$1 local be=$(do_facet $SINGLEAGT find "$(hsm_root)" -name $fid) @@ -500,55 +271,12 @@ copy2archive() { error "cannot copy '$1' to '$file'" } -mdts_set_param() { - local arg=$1 - local key=$2 - local value=$3 - local mdtno - local rc=0 - if [[ "$value" != "" ]]; then - value="=$value" - fi - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - local facet=mds${mdtno} - # if $arg include -P option, run 1 set_param per MDT on the MGS - # else, run set_param on each MDT - [[ $arg = *"-P"* ]] && facet=mgs - do_facet $facet $LCTL set_param $arg mdt.${MDT[$idx]}.$key$value - [[ $? != 0 ]] && rc=1 - done - return $rc -} - -mdts_check_param() { - local key="$1" - local target="$2" - local timeout="$3" - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - wait_result mds${mdtno} \ - "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ - $timeout || - error "$key state is not '$target' on mds${mdtno}" - done -} - get_hsm_param() { local param=$1 local val=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.$param) echo $val } -set_hsm_param() { - local param=$1 - local value=$2 - local opt=$3 - mdts_set_param "$opt -n" "hsm.$param" "$value" - return $? -} - set_test_state() { local cmd=$1 local target=$2 @@ -556,15 +284,6 @@ set_test_state() { mdts_check_param hsm_control "$target" 10 } -cdt_set_sanity_policy() { - if [[ "$CDT_POLICY_HAD_CHANGED" ]] - then - # clear all - mdts_set_param "" hsm.policy "+NRA" - mdts_set_param "" hsm.policy "-NBR" - CDT_POLICY_HAD_CHANGED= - fi -} cdt_set_no_retry() { mdts_set_param "" hsm.policy "+NRA" @@ -590,21 +309,6 @@ cdt_clear_mount_state() { mdts_set_param "-P -d" hsm_control "" } -cdt_set_mount_state() { - mdts_set_param "-P" hsm_control "$1" - # set_param -P is asynchronous operation and could race with set_param. - # In such case configs could be retrieved and applied at mgc after - # set_param -P completion. Sleep here to avoid race with set_param. - # We need at least 20 seconds. 10 for mgc_requeue_thread to wake up - # MGC_TIMEOUT_MIN_SECONDS + MGC_TIMEOUT_RAND_CENTISEC(5 + 5) - # and 10 seconds to retrieve config from server. - sleep 20 -} - -cdt_check_state() { - mdts_check_param hsm_control "$1" 20 -} - cdt_disable() { set_test_state disabled disabled } @@ -627,37 +331,6 @@ cdt_restart() { cdt_set_sanity_policy } -needclients() { - local client_count=$1 - if [[ $CLIENTCOUNT -lt $client_count ]]; then - skip "Need $client_count or more clients, have $CLIENTCOUNT" - return 1 - fi - return 0 -} - -path2fid() { - $LFS path2fid $1 | tr -d '[]' - return ${PIPESTATUS[0]} -} - -get_hsm_flags() { - local f=$1 - local u=$2 - local st - - if [[ $u == "user" ]]; then - st=$($RUNAS $LFS hsm_state $f) - else - u=root - st=$($LFS hsm_state $f) - fi - - [[ $? == 0 ]] || error "$LFS hsm_state $f failed (run as $u)" - - st=$(echo $st | cut -f 2 -d" " | tr -d "()," ) - echo $st -} get_hsm_archive_id() { local f=$1 @@ -669,14 +342,6 @@ get_hsm_archive_id() { echo $ar } -check_hsm_flags() { - local f=$1 - local fl=$2 - - local st=$(get_hsm_flags $f) - [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl" -} - check_hsm_flags_user() { local f=$1 local fl=$2 @@ -713,44 +378,6 @@ delete_large_files() { wait_delete_completed } -make_custom_file_for_progress() { - local count=${2:-"39"} - local bs=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1) - bs=${3:-$bs} - - [[ $count -gt 0 ]] || error "Invalid file size" - [[ $bs -gt 0 ]] || error "Invalid stripe size" - - if ! create_file "${1/$DIR/$DIR2}" $bs $count fsync; then - echo "The creation of '${1/$DIR/$DIR2}' failed" >&2 - echo "It might be due to a lack of space in the filesystem" >&2 - delete_large_files >&2 - create_file "${1/$DIR/$DIR2}" $bs $count fsync || - file_creation_failure dd "${1/$DIR/$DIR2}" $? - fi -} - -wait_result() { - local facet=$1 - shift - wait_update --verbose $(facet_active_host $facet) "$@" -} - -wait_request_state() { - local fid=$1 - local request=$2 - local state=$3 - # 4th arg (mdt index) is optional - local mdtidx=${4:-0} - local mds=mds$(($mdtidx + 1)) - - local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" - cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" - - wait_result $mds "$cmd" $state 200 || - error "request on $fid is not $state on $mds" -} - get_request_state() { local fid=$1 local request=$2 @@ -784,7 +411,7 @@ wait_all_done() { [[ -n $fid ]] && cmd+=" | grep '$fid'" cmd+=" | egrep 'WAITING|STARTED'" - wait_result $SINGLEMDS "$cmd" "" $timeout || + wait_update_facet --verbose mds1 "$cmd" "" $timeout || error "requests did not complete" } @@ -863,7 +490,8 @@ get_agent_uuid() { # Lustre mount-point is mandatory and last parameter on # copytool cmd-line. - local mntpnt=$(do_rpc_nodes $agent ps -C $HSMTOOL_BASE -o args= | + local mntpnt=$(do_rpc_nodes $agent \ + pgrep --pidfile=$HSMTOOL_PID_FILE --list-full hsmtool | awk '{print $NF}') [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\ "parameter" @@ -930,6 +558,8 @@ test_1A() { # was test_1 run_test 1A "lfs hsm flags root/non-root access" test_1a() { + mkdir_on_mdt0 $DIR/$tdir + local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) @@ -947,11 +577,8 @@ test_1a() { } run_test 1a "mmap & cat a HSM released file" -test_1b() { - mkdir -p $DIR/$tdir - $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir || - error "failed to set default stripe" - local f=$DIR/$tdir/$tfile +test_1bde_base() { + local f=$1 rm -f $f dd if=/dev/urandom of=$f bs=1M count=1 conv=sync || @@ -975,6 +602,15 @@ test_1b() { echo "verify restored state: " check_hsm_flags $f "0x00000009" && echo "pass" } + +test_1b() { + mkdir_on_mdt0 $DIR/$tdir + $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir || + error "failed to set default stripe" + local f=$DIR/$tdir/$tfile + + test_1bde_base $f +} run_test 1b "Archive, Release and Restore composite file" test_1c() { @@ -1003,10 +639,26 @@ test_1c() { [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" - # Test whether setting archive number > 32 results in error. - $LFS hsm_set --exists --archive-id 33 $f && - error "archive number is larger than 32" - check_hsm_flags_user $f "0x00000001" + LOCAL_HSM_ARCHIVE_NUMBER=33 + if [ "$CLIENT_VERSION" -ge $(version_code 2.11.56) ] && + [ "$MDS1_VERSION" -ge $(version_code 2.11.56) ]; then + # lustre in the new version supports unlimited archiveID. + # Test whether setting archive number > 32 is supported + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f || + error "archive ID $LOCAL_HSM_ARCHIVE_NUMBER too large?" + check_hsm_flags_user $f "0x00000001" + + echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER" + st=$(get_hsm_archive_id $f) + [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || + error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" + else + # old client or old mds can only support at most 32 archiveID + # test whether setting archive number > 32 results in error. + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f && + error "bitmap archive number is larger than 32" + check_hsm_flags_user $f "0x00000001" + fi # Test whether setting archive number 16 and archived flag. LOCAL_HSM_ARCHIVE_NUMBER=16 @@ -1022,38 +674,61 @@ test_1c() { run_test 1c "Check setting archive-id in lfs hsm_set" test_1d() { - mkdir -p $DIR/$tdir + [ $MDS1_VERSION -lt $(version_code 2.10.59) ] && + skip "need MDS version at least 2.10.59" + + mkdir_on_mdt0 $DIR/$tdir $LFS setstripe -E 1M -L mdt -E -1 -c 2 $DIR/$tdir || error "failed to set default stripe" local f=$DIR/$tdir/$tfile - rm -f $f - dd if=/dev/urandom of=$f bs=1M count=1 conv=sync || - error "failed to create file" - local fid=$(path2fid $f) + test_1bde_base $f +} +run_test 1d "Archive, Release and Restore DoM file" - copytool setup +test_1e() { + [ "$MDS1_VERSION" -lt $(version_code $SEL_VER) ] && + skip "skipped for lustre < $SEL_VER" - echo "archive $f" - $LFS hsm_archive $f || error "could not archive file" - wait_request_state $fid ARCHIVE SUCCEED + mkdir_on_mdt0 $DIR/$tdir + $LFS setstripe -E 1G -z 64M -E 10G -z 512M -E -1 -z 1G $DIR/$tdir || + error "failed to set default stripe" + local comp_file=$DIR/$tdir/$tfile - echo "release $f" - $LFS hsm_release $f || error "could not release file" - echo "verify released state: " - check_hsm_flags $f "0x0000000d" && echo "pass" + test_1bde_base $comp_file - echo "restore $f" - $LFS hsm_restore $f || error "could not restore file" - wait_request_state $fid RESTORE SUCCEED - echo "verify restored state: " - check_hsm_flags $f "0x00000009" && echo "pass" + local flg_opts="--comp-start 0 -E 64M --comp-flags init" + local found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "1st component not found" + + flg_opts="--comp-start 64M -E 1G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "2nd component not found" + + flg_opts="--comp-start 1G -E 1G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "3rd component not found" + + flg_opts="--comp-start 1G -E 10G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "4th component not found" + + flg_opts="--comp-start 10G -E 10G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "5th component not found" + + flg_opts="--comp-start 10G -E EOF --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "6th component not found" + + sel_layout_sanity $comp_file 6 } -run_test 1d "Archive, Release and Restore DoM file" +run_test 1e "Archive, Release and Restore SEL file" test_2() { local f=$DIR/$tdir/$tfile + mkdir_on_mdt0 $DIR/$tdir create_empty_file "$f" # New files are not dirty check_hsm_flags $f "0x00000000" @@ -1080,7 +755,7 @@ test_2() { run_test 2 "Check file dirtyness when doing setattr" test_3() { - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir f=$DIR/$tdir/$tfile # New files are not dirty @@ -1148,7 +823,7 @@ test_8() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) $LFS hsm_archive $f @@ -1168,7 +843,8 @@ test_9A() { # was test_9 local uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT)) check_agent_registered $uuid - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir + local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) $LFS hsm_archive --archive $archive_id $f @@ -1190,6 +866,8 @@ test_9a() { copytool setup --facet agt$n done + mkdir_on_mdt0 $DIR/$tdir + # archive files for n in $(seq $AGTCOUNT); do file=$DIR/$tdir/$tfile.$n @@ -1206,6 +884,7 @@ test_10a() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir mkdir -p $DIR/$tdir/d1 local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) @@ -1234,7 +913,7 @@ test_10b() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_archive $f || error "archive request failed" @@ -1264,7 +943,7 @@ test_10d() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_archive $f || error "cannot archive $f" @@ -1278,7 +957,7 @@ test_10d() { run_test 10d "Archive a file on the default archive id" test_11a() { - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile @@ -1292,7 +971,7 @@ test_11a() { echo "Verifying imported size $LSZ=$ASZ" [[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ" echo -n "Verifying released pattern: " - local PTRN=$($GETSTRIPE -L $f) + local PTRN=$($LFS getstripe -L $f) echo $PTRN [[ $PTRN == released ]] || error "Is not released" local fid=$(path2fid $f) @@ -1307,7 +986,7 @@ test_11b() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $f || @@ -1325,11 +1004,23 @@ test_11b() { } run_test 11b "Import a deleted file using its FID" +test_11c() { + pool_add $TESTNAME || error "Pool creation failed" + pool_add_targets $TESTNAME 1 1 || error "pool_add_targets failed" + + mkdir -p $DIR/$tdir + $LFS setstripe -p "$TESTNAME" $DIR/$tdir + + copy2archive /etc/hosts $tdir/$tfile + copytool import $tdir/$tfile $DIR/$tdir/$tfile +} +run_test 11c "Import a file to a directory with a pool" + test_12a() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile @@ -1355,7 +1046,7 @@ test_12b() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile @@ -1381,11 +1072,9 @@ test_12c() { copytool setup local f=$DIR/$tdir/$tfile - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir $LFS setstripe -c 2 "$f" - local fid - fid=$(make_custom_file_for_progress $f 5) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1M 5) local FILE_CRC=$(md5sum $f) @@ -1403,7 +1092,7 @@ test_12d() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) @@ -1427,7 +1116,7 @@ test_12e() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_archive $f || error "archive request failed" @@ -1447,7 +1136,7 @@ test_12f() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) @@ -1470,7 +1159,7 @@ test_12g() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) @@ -1494,7 +1183,7 @@ test_12h() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) @@ -1516,7 +1205,7 @@ test_12m() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) $LFS hsm_archive $f || error "archive of $f failed" @@ -1551,7 +1240,7 @@ test_12o() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) @@ -1600,7 +1289,7 @@ test_12p() { # test needs a running copytool copytool setup - mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) @@ -1615,8 +1304,8 @@ test_12p() { run_test 12p "implicit restore of a file on copytool mount point" test_12q() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.58) ] && - skip "need MDS version at least 2.7.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.58) ] && + skip "need MDS version at least 2.7.58" stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT zconf_mount $(facet_host $SINGLEAGT) $MOUNT3 || @@ -1625,6 +1314,8 @@ test_12q() { # test needs a running copytool copytool setup -m "$MOUNT3" + mkdir_on_mdt0 $DIR/$tdir + local f=$DIR/$tdir/$tfile local f2=$DIR2/$tdir/$tfile local fid=$(create_small_file $f) @@ -1673,6 +1364,57 @@ test_12q() { } run_test 12q "file attributes are refreshed after restore" +test_12r() { + # test needs a running copytool + copytool setup + + mkdir_on_mdt0 $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/hosts $f) + + $LFS hsm_archive $f || error "archive of $f failed" + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f || error "release of $f failed" + + offset=$(lseek_test -d 7 $f) + + # we check we had a restore done + wait_request_state $fid RESTORE SUCCEED + [[ $offset == 7 ]] || error "offset $offset != 7" +} +run_test 12r "lseek restores released file" + +test_12s() { + local f=$DIR/$tdir/$tfile + local fid + local pid1 pid2 + + (( MDS1_VERSION >= $(version_code 2.15.50) )) || + skip "Need MDS version newer than 2.15.50" + + # test needs a running copytool + copytool setup + + mkdir_on_mdt0 $DIR/$tdir + fid=$(copy_file /etc/hosts $f) + + $LFS hsm_archive $f || error "archive of $f failed" + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f || error "release of $f failed" + +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_HSM_RESTORE_RACE 0x8000018b + do_facet mds1 $LCTL set_param fail_loc=0x8000018b + cat $f > /dev/null & pid1=$! + cat $f > /dev/null & pid2=$! + + wait $pid1 || error "cat process 1 fail (pid: $pid1)" + wait $pid2 || error "cat process 2 fail (pid: $pid2)" + + # Race exists if more than 1 restore requests is registered + assert_request_count $fid RESTORE 1 +} +run_test 12s "race between restore requests" + test_13() { local -i i j k=0 for i in {1..10}; do @@ -1709,6 +1451,8 @@ test_14() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir + # archive a file local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) @@ -1737,6 +1481,8 @@ test_15() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir + # archive files local f=$DIR/$tdir/$tfile local count=5 @@ -1789,7 +1535,7 @@ test_16() { local goal=20 dd if=/dev/zero of=$ref bs=1M count=20 - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file $ref $f) rm $ref @@ -1835,6 +1581,7 @@ run_test 20 "Release is not permitted" test_21() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/test_release @@ -1844,7 +1591,7 @@ test_21() { # LU-4388/LU-4389 - ZFS does not report full number of blocks # used until file is flushed to disk - if [ $(facet_fstype ost1) == "zfs" ]; then + if [ "$ost1_FSTYPE" == "zfs" ]; then # this causes an OST_SYNC rpc to be sent dd if=/dev/zero of=$f bs=512 count=1 oflag=sync conv=notrunc,fsync # clear locks to reread file data @@ -1901,6 +1648,7 @@ run_test 21 "Simple release tests" test_22() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/test_release local swap=$DIR/$tdir/test_swap @@ -1926,6 +1674,7 @@ run_test 22 "Could not swap a release file" test_23() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/test_mtime @@ -1962,6 +1711,7 @@ test_24a() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir fid=$(create_small_file $file) @@ -2064,6 +1814,7 @@ test_24b() { # Test needs a running copytool. copytool setup + mkdir_on_mdt0 $DIR/$tdir # Check that root can do HSM actions on a regular user's file. fid=$(create_small_file $file) @@ -2109,8 +1860,7 @@ test_24c() { # test needs a running copytool copytool setup - - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir # Save the default masks and check that cleanup_24c will # restore the request masks correctly. @@ -2172,6 +1922,8 @@ test_24d() { local fid1 local fid2 + mkdir_on_mdt0 $DIR/$tdir + fid1=$(create_small_file $file1) echo $fid1 @@ -2215,6 +1967,7 @@ run_test 24d "check that read-only mounts are respected" test_24e() { copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid @@ -2234,6 +1987,7 @@ run_test 24e "tar succeeds on HSM released files" # LU-6213 test_24f() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir mkdir -p $DIR/$tdir/d1 local f=$DIR/$tdir/$tfile @@ -2255,6 +2009,33 @@ test_24f() { } run_test 24f "root can archive, release, and restore tar files" +test_24g() { + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local file=$DIR/$tdir/$tfile + local fid + + echo "RUNAS = '$RUNAS'" + + copytool setup + + mkdir_on_mdt0 $DIR/$tdir + chmod ugo+rwx $DIR/$tdir + + echo "Please listen carefully as our options have changed." | tee $file + fid=$(path2fid $file) + chmod ugo+rw $file + + $LFS hsm_archive $file + wait_request_state $fid ARCHIVE SUCCEED + check_hsm_flags $file 0x00000009 # exists archived + + echo "To be electrocuted by your telephone, press #." | $RUNAS tee $file + check_hsm_flags $file 0x0000000b # exists dirty archived +} +run_test 24g "write by non-owner still sets dirty" # LU-11369 + test_25a() { # test needs a running copytool copytool setup @@ -2280,7 +2061,7 @@ test_25b() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -2301,11 +2082,10 @@ run_test 25b "Restore lost file (HS_LOST flag) after release"\ test_26A() { # was test_26 # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2323,8 +2103,8 @@ test_26a() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -2373,8 +2153,8 @@ run_test 26a "Remove Archive On Last Unlink (RAoLU) policy" test_26b() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -2405,8 +2185,8 @@ run_test 26b "RAoLU policy when CDT off" test_26c() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -2452,8 +2232,8 @@ run_test 26c "RAoLU effective when file closed" test_26d() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) @@ -2505,11 +2285,10 @@ run_test 27a "Remove the archive of an imported file (Operation not permitted)" test_27b() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2524,11 +2303,10 @@ run_test 27b "Remove the archive of a relased file (Operation not permitted)" test_28() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2565,6 +2343,7 @@ run_test 29a "Tests --mntpath and --archive options" test_29b() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) @@ -2582,6 +2361,7 @@ run_test 29b "Archive/delete/remove by FID from the archive." test_29c() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local fid1=$(create_small_file $DIR/$tdir/$tfile-1) local fid2=$(create_small_file $DIR/$tdir/$tfile-2) @@ -2619,6 +2399,8 @@ test_29d() { copytool setup -f agt$n -a $n done + mkdir_on_mdt0 $DIR/$tdir + # archive files file=$DIR/$tdir/$tfile fid=$(create_small_file $file) @@ -2706,7 +2488,7 @@ test_30b() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/true local fid=$(copy_file /bin/true $f) chmod 755 $f @@ -2734,7 +2516,7 @@ test_30c() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/SLEEP local slp_sum1=$(md5sum /bin/sleep) local fid=$(copy_file /bin/sleep $f) @@ -2802,6 +2584,7 @@ restore_and_check_size() { test_31a() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir create_archive_file $tdir/$tfile local f=$DIR/$tdir/$tfile @@ -2820,11 +2603,10 @@ run_test 31a "Import a large file and check size during restore" test_31b() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2840,11 +2622,10 @@ run_test 31b "Restore a large unaligned file and check size during restore" test_31c() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 33 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1M 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2858,127 +2639,103 @@ test_31c() { run_test 31c "Restore a large aligned file and check size during restore" test_33() { - # test needs a running copytool - copytool setup -b 1 - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + + mkdir_on_mdt0 $DIR/$tdir + local fid=$(create_empty_file "$f") + + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f - # to be sure wait_all_done will not be mislead by previous tests - # and ops. - cdt_purge - wait_for_grace_delay - # Also raise grace_delay significantly so the Canceled - # Restore action will stay enough long avail. - local old_grace=$(get_hsm_param grace_delay) - stack_trap "set_hsm_param grace_delay $old_grace" EXIT - set_hsm_param grace_delay 100 + # Prevent restore from completing + copytool_suspend + # Implicit restore md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED + wait_request_state $fid RESTORE STARTED kill -15 $pid - sleep 1 - - # Check restore trigger process was killed - local killed=$(ps -o pid,comm hp $pid >/dev/null) - $LFS hsm_cancel $f - - # instead of waiting+checking both Restore and Cancel ops - # sequentially, wait for both to be finished and then check - # each results. - wait_all_done 100 $fid - local rstate=$(get_request_state $fid RESTORE) - local cstate=$(get_request_state $fid CANCEL) - - if [[ "$rstate" == "CANCELED" ]] ; then - [[ "$cstate" == "SUCCEED" ]] || - error "Restore state is CANCELED and Cancel state " \ - "is not SUCCEED but $cstate" - echo "Restore state is CANCELED, Cancel state is SUCCEED" - elif [[ "$rstate" == "SUCCEED" ]] ; then - [[ "$cstate" == "FAILED" ]] || - error "Restore state is SUCCEED and Cancel state " \ - "is not FAILED but $cstate" - echo "Restore state is SUCCEED, Cancel state is FAILED" - else - error "Restore state is $rstate and Cancel state is $cstate" - fi + copytool_continue - [ -z $killed ] || - error "Cannot kill process waiting for restore ($killed)" + # Check restore trigger process was killed + wait $pid + [ $? -eq 143 ] || error "md5sum was not 'Terminated'" } run_test 33 "Kill a restore waiting process" test_34() { # test needs a running copytool copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - rm $f || error "rm $f failed" # rm must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 rm "$f" || error "rm $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" wait $pid || error "Restore initiator failed with $?" + + # Check the file was actually deleted + [ ! -f "$f" ] || error "$f was not deleted" } run_test 34 "Remove file during restore" test_35() { # test needs a running copytool copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local f1=$DIR/$tdir/$tfile-1 - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return - + local fid=$(create_empty_file "$f") local fid1=$(copy_file /etc/passwd $f1) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - mv $f1 $f || error "mv $f1 $f failed" # mv must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 mv "$f1" "$f" || error "mv $f1 $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" wait $pid || error "Restore initiator failed with $?" - fid2=$(path2fid $f) + local fid2=$(path2fid $f) [[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1" } run_test 35 "Overwrite file during restore" @@ -2986,30 +2743,32 @@ run_test 35 "Overwrite file during restore" test_36() { # test needs a running copytool copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED - mv $f $f.new - # rm must not block during restore wait_request_state $fid RESTORE STARTED + # mv must not block during restore + timeout --signal=KILL 10 mv "$f" "$f.new" || + error "mv '$f' '$f.new' failed with rc=$?" + + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || - error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiator is still running" wait $pid || error "Restore initiator failed with $?" } run_test 36 "Move file during restore" @@ -3017,6 +2776,7 @@ run_test 36 "Move file during restore" test_37() { # LU-5683: check that an archived dirty file can be rearchived. copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid @@ -3092,11 +2852,96 @@ test_40() { } run_test 40 "Parallel archive requests" +hsm_archive_batch() { + local files_num=$1 + local batch_max=$2 + local filebase=$3 + local batch_num=0 + local fileset="" + local i=0 + + while [ $i -lt $files_num ]; do + if [ $batch_num -eq $batch_max ]; then + $LFS hsm_archive $fileset || error "HSM archive failed" + # Reset the batch container. + fileset="" + batch_num=0 + fi + + fileset+="${filebase}$i " + batch_num=$(( batch_num + 1 )) + i=$(( i + 1 )) + done + + if [ $batch_num -ne 0 ]; then + $LFS hsm_archive $fileset || error "HSM archive failed" + fileset="" + batch_num=0 + fi +} + +test_50() { + local dir=$DIR/$tdir + local batch_max=50 + + set_hsm_param max_requests 1000000 + mkdir $dir || error "mkdir $dir failed" + df -i $MOUNT + + local start + local elapsed + local files_num + local filebase + + files_num=10000 + filebase="$dir/$tfile.start." + createmany -m $filebase $files_num || + error "createmany -m $filebase failed: $?" + + start=$SECONDS + hsm_archive_batch $files_num $batch_max "$filebase" + elapsed=$((SECONDS - start)) + do_facet $SINGLEMDS "$LCTL get_param -n \ + $HSM_PARAM.actions | grep WAITING | wc -l" + unlinkmany $filebase $files_num || error "unlinkmany $filabase failed" + echo "Start Phase files_num: $files_num time: $elapsed" + + files_num=20000 + filebase="$dir/$tfile.in." + createmany -m $filebase $files_num || + error "createmany -m $filebase failed: $?" + start=$SECONDS + hsm_archive_batch $files_num $batch_max "$filebase" + elapsed=$((SECONDS - start)) + unlinkmany $filebase $files_num || error "unlinkmany $filabase failed" + echo "Middle Phase files_num: $files_num time: $elapsed" + + files_num=10000 + filebase="$dir/$tfile.end." + createmany -m $filebase $files_num || + error "createmany -m $filebase failed: $?" + + start=$SECONDS + hsm_archive_batch $files_num $batch_max "$filebase" + elapsed=$((SECONDS - start)) + do_facet $SINGLEMDS "$LCTL get_param -n \ + $HSM_PARAM.actions | grep WAITING | wc -l" + + unlinkmany $filebase $files_num || error "unlinkmany $filebase failed" + echo "End Phase files_num: $files_num time: $elapsed" + + do_facet $SINGLEMDS "$LCTL get_param -n \ + $HSM_PARAM.actions | grep WAITING | wc -l" + + cdt_purge +} +run_test 50 "Archive with large number of pending HSM actions" + test_52() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) @@ -3121,7 +2966,7 @@ test_53() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) @@ -3144,11 +2989,12 @@ test_53() { run_test 53 "Opened for read file on an evicted client should not be set dirty" test_54() { - # test needs a running copytool - copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 39 1000000) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3169,11 +3015,12 @@ test_54() { run_test 54 "Write during an archive cancels it" test_55() { - # test needs a running copytool - copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 39 1000000) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3194,13 +3041,12 @@ test_55() { run_test 55 "Truncate during an archive cancels it" test_56() { - # test needs a running copytool - copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3226,7 +3072,7 @@ test_57() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/test_archive_remote # Create a file on a remote node do_node $CLIENT2 "dd if=/dev/urandom of=$f bs=1M "\ @@ -3288,7 +3134,7 @@ test_58() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local sz=$(stat -c %s /etc/passwd) @@ -3305,9 +3151,8 @@ run_test 58 "Truncate a released file will trigger restore" test_59() { local fid - local server_version=$(lustre_version_code $SINGLEMDS) - [[ $server_version -lt $(version_code 2.7.63) ]] && - skip "Need MDS version at least 2.7.63" && return + [[ $MDS1_VERSION -lt $(version_code 2.7.63) ]] && + skip "Need MDS version at least 2.7.63" copytool setup $MCREATE $DIR/$tfile || error "mcreate failed" @@ -3323,16 +3168,15 @@ test_60() { # This test validates the fix for LU-4512. Ensure that the -u # option changes the progress reporting interval from the # default (30 seconds) to the user-specified interval. + mkdir_on_mdt0 $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local fid=$(create_file "$f" 1M 10) + local interval=5 local progress_timeout=$((interval * 4)) - copytool setup -b 1 --update-interval $interval - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 10) - [ $? != 0 ] && skip "not enough free space" && return - local mdtidx=0 local mdt=${MDT_PREFIX}${mdtidx} local mds=mds$((mdtidx + 1)) @@ -3361,7 +3205,7 @@ test_60() { local sleep=1 echo -n "Expecting a progress update within $progress_timeout seconds... " - while [ true ]; do + while true; do RESULT=$(do_node $(facet_active_host $mds) "$cmd") if [ -n "$RESULT" ] && [ "$RESULT" -gt 0 ]; then echo "$RESULT bytes copied in $WAIT seconds." @@ -3392,7 +3236,7 @@ test_61() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) cdt_disable @@ -3460,10 +3304,10 @@ test_71() { # Just start and stop the copytool to generate events. cdt_clear_no_retry + mkdir_on_mdt0 $DIR/$tdir + local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_small_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3472,8 +3316,11 @@ test_71() { local expected_fields="event_time data_fid source_fid" expected_fields+=" total_bytes current_bytes" - local START_EVENT - local FINISH_EVENT + local -A events=( + [ARCHIVE_START]=false + [ARCHIVE_FINISH]=false + [ARCHIVE_RUNNING]=false + ) while read event; do # Make sure we're not getting anything from previous events. for field in $expected_fields; do @@ -3486,15 +3333,9 @@ test_71() { fi eval $parsed - if [ $event_type == "ARCHIVE_START" ]; then - START_EVENT=$event - continue - elif [ $event_type == "ARCHIVE_FINISH" ]; then - FINISH_EVENT=$event - continue - elif [ $event_type != "ARCHIVE_RUNNING" ]; then - continue - fi + events["$event_type"]=true + + [ "$event_type" != ARCHIVE_RUNNING ] && continue # Do some simple checking of the progress update events. for expected_field in $expected_fields; do @@ -3503,24 +3344,18 @@ test_71() { fi done - if [ $total_bytes -eq 0 ]; then - error "Expected total_bytes to be > 0" - fi + [ $total_bytes -gt 0 ] || error "Expected total_bytes to be > 0" - # These should be identical throughout an archive - # operation. - if [ $source_fid != $data_fid ]; then + # These should be identical throughout an archive operation + [ $source_fid == $data_fid ] || error "Expected source_fid to equal data_fid" - fi done < <(echo $"$(get_copytool_event_log)") - if [ -z "$START_EVENT" ]; then - error "Copytool failed to send archive start event to FIFO" - fi - - if [ -z "$FINISH_EVENT" ]; then - error "Copytool failed to send archive finish event to FIFO" - fi + # Check we received every type of events we were expecting + for event in "${!events[@]}"; do + ${events["$event"]} || + error "Copytool failed to send '$event' event to FIFO" + done echo "Archive events look OK." } @@ -3543,7 +3378,7 @@ test_72() { error "cannot create $test_file on $SINGLEAGT" copy2archive $test_file $tdir/$tfile - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile copytool import $tdir/$tfile $f f=$DIR2/$tdir/$tfile @@ -3625,7 +3460,7 @@ run_test 72 "Copytool logs JSON restore events to FIFO" test_90() { file_count=51 # Max number of files constrained by LNET message size - mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed" + mkdir_on_mdt0 $DIR/$tdir || error "mkdir $DIR/$tdir failed" local f=$DIR/$tdir/$tfile local FILELIST=/tmp/filelist.txt local i="" @@ -3642,12 +3477,12 @@ test_90() { wait_for_grace_delay $LFS hsm_archive --filelist $FILELIST || error "cannot archive a file list" - wait_all_done 100 + wait_all_done 200 $LFS hsm_release --filelist $FILELIST || error "cannot release a file list" $LFS hsm_restore --filelist $FILELIST || error "cannot restore a file list" - wait_all_done 100 + wait_all_done 200 } run_test 90 "Archive/restore a file list" @@ -3712,13 +3547,73 @@ test_103() { } run_test 103 "Purge all requests" +test_103a() { + (( MDS1_VERSION >= $(version_code 2.14.56) )) || + skip "Need MDS version at least 2.14.56" + + cdt_clear_non_blocking_restore + + # test needs a running copytool + copytool setup + + local -a fids=() + local i + local rpcs_inflight=$($LCTL get_param -n \ + "mdc.$(facet_svc mds1)*.max_rpcs_in_flight" | + head -n1) + + mkdir_on_mdt0 $DIR/$tdir + for ((i=0; i < rpcs_inflight; i++)); do + fids+=( $(copy_file /etc/passwd $DIR/$tdir/${tfile}_$i) ) + done + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/* + + local time=0 + local cnt=0 + local grep_regex="($(tr ' ' '|' <<< "${fids[*]}")).*action=ARCHIVE.*status=SUCCEED" + echo $grep_regex + while [[ $time -lt 5 ]] && [[ $cnt -ne ${#fids[@]} ]]; do + cnt=$(do_facet mds1 "$LCTL get_param -n $HSM_PARAM.actions | + grep -c -E '$grep_regex'") + sleep 1 + ((++time)) + done + [[ $cnt -eq ${#fids[@]} ]] || error "Fail to archive files $cnt/${#fids[@]}" + + $LFS hsm_release $DIR/$tdir/* + + kill_copytools + wait_copytools || error "Copytool failed to stop" + + local -a pids=() + for i in "${fids[@]}"; do + cat $DIR/.lustre/fid/$i > /dev/null & pids+=($!) + done + + cdt_purge + grep_regex="($(tr ' ' '|' <<< "${fids[*]}")).*action=RESTORE.*status=CANCELED" + cnt=$(do_facet mds1 "$LCTL get_param -n $HSM_PARAM.actions | + grep -cE '$grep_regex'") + + [[ "$cnt" -eq ${#fids[@]} ]] || + error "Some request have not been canceled ($cnt/${#fids[@]} canceled)" + + # cat cmds should not hang and should fail + for i in "${!pids[@]}"; do + wait ${pids[$i]} && + error "Restore for ${tfile}_$i (${pids[$i]}) should fail" || + true + done +} +run_test 103a "Purge pending restore requests" + DATA=CEA DATAHEX='[434541]' test_104() { + mkdir_on_mdt0 $DIR/$tdir + local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\ @@ -3728,16 +3623,13 @@ test_104() { [[ "$data1" == "$DATAHEX" ]] || error "Data field in records is ($data1) and not ($DATAHEX)" - # archive the file - copytool setup - - wait_request_state $fid ARCHIVE SUCCEED + cdt_purge } run_test 104 "Copy tool data field" test_105() { local max_requests=$(get_hsm_param max_requests) - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local i="" stack_trap "set_hsm_param max_requests $max_requests" EXIT @@ -3792,7 +3684,7 @@ test_107() { # test needs a running copytool copytool setup # create and archive file - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f1=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f1) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f1 @@ -3847,7 +3739,7 @@ test_110a() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile @@ -3874,7 +3766,7 @@ test_110b() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -3900,7 +3792,7 @@ test_111a() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile @@ -3929,7 +3821,7 @@ test_111b() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) stack_trap cdt_clear_no_retry EXIT @@ -3954,7 +3846,7 @@ test_112() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) cdt_disable @@ -3971,23 +3863,65 @@ test_112() { } run_test 112 "State of recorded request" +test_113() { + mkdir_on_mdt0 $DIR/$tdir + + local file1=$DIR/$tdir/$tfile + local file2=$DIR2/$tdir/$tfile + + local fid=$(create_small_sync_file $file1) + + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT + zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" || + error "cannot mount '$MOUNT3' on '$SINGLEAGT'" + + copytool setup -m "$MOUNT3" + + do_nodes $(comma_list $(nodes_list)) $LCTL clear + + $LFS hsm_archive $file1 || error "Fail to archive $file1" + wait_request_state $fid ARCHIVE SUCCEED + + $LFS hsm_release $file1 + echo "Verifying released state: " + check_hsm_flags $file1 "0x0000000d" + + multiop_bg_pause $file1 oO_WRONLY:O_APPEND:_w4c || error "multiop failed" + MULTIPID=$! + stat $file2 & + kill -USR1 $MULTIPID + + wait + sync + + local size1=$(stat -c "%s" $file1) + local size2=$(stat -c "%s" $file2) + + [ $size1 -eq $size2 ] || error "sizes are different $size1 $size2" +} +run_test 113 "wrong stat after restore" + test_200() { - # test needs a running copytool - copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup + + # Prevent archive from completing + copytool_suspend - # test with cdt on is made in test_221 - cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f # wait archive to register at CDT - wait_request_state $fid ARCHIVE WAITING - $LFS hsm_cancel $f - cdt_enable + wait_request_state $fid ARCHIVE STARTED + + # Cancel the archive + $LFS hsm_cancel "$f" + wait_request_state $fid ARCHIVE CANCELED + + copytool_continue wait_request_state $fid CANCEL SUCCEED } run_test 200 "Register/Cancel archive" @@ -3995,6 +3929,7 @@ run_test 200 "Register/Cancel archive" test_201() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile create_archive_file $tdir/$tfile @@ -4014,23 +3949,23 @@ test_201() { run_test 201 "Register/Cancel restore" test_202() { - # test needs a running copytool - copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + # test needs a running copytool + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - cdt_disable + copytool_suspend $LFS hsm_remove $f # wait remove to register at CDT - wait_request_state $fid REMOVE WAITING + wait_request_state $fid REMOVE STARTED $LFS hsm_cancel $f - cdt_enable + wait_request_state $fid REMOVE CANCELED } run_test 202 "Register/Cancel remove" @@ -4039,7 +3974,7 @@ test_220A() { # was test_220 # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -4058,7 +3993,7 @@ test_220a() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -4087,20 +4022,23 @@ test_220a() { run_test 220a "Changelog for failed archive" test_221() { - # test needs a running copytool - copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + copytool setup -b 1 changelog_register + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED + $LFS hsm_cancel $f wait_request_state $fid ARCHIVE CANCELED + + copytool_continue wait_request_state $fid CANCEL SUCCEED changelog_find -type HSM -target-fid $fid -flags 0x7d || @@ -4112,7 +4050,7 @@ test_222a() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile @@ -4133,7 +4071,7 @@ test_222b() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -4155,7 +4093,7 @@ test_222c() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile @@ -4189,7 +4127,7 @@ test_222d() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -4212,6 +4150,7 @@ run_test 222d "Changelog for failed implicit restore" test_223a() { # test needs a running copytool copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile create_archive_file $tdir/$tfile @@ -4233,22 +4172,27 @@ test_223a() { run_test 223a "Changelog for restore canceled (import case)" test_223b() { - # test needs a running copytool - copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + copytool setup -b 1 changelog_register + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + + # Prevent restore from completing + copytool_suspend $LFS hsm_restore $f wait_request_state $fid RESTORE STARTED + $LFS hsm_cancel $f wait_request_state $fid RESTORE CANCELED + + copytool_continue wait_request_state $fid CANCEL SUCCEED changelog_find -type HSM -target-fid $fid -flags 0xfd || @@ -4260,7 +4204,7 @@ test_224A() { # was test_224 # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -4281,7 +4225,7 @@ test_224a() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) @@ -4321,22 +4265,23 @@ test_225() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - # if cdt is on, it can serve too quickly the request - cdt_disable + # Prevent restore from completing + copytool_suspend $LFS hsm_remove $f + $LFS hsm_cancel $f - cdt_enable wait_request_state $fid REMOVE CANCELED + + copytool_continue wait_request_state $fid CANCEL SUCCEED changelog_find -type HSM -target-fid $fid -flags 0x27d @@ -4348,7 +4293,7 @@ test_226() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f1=$DIR/$tdir/$tfile-1 local f2=$DIR/$tdir/$tfile-2 @@ -4423,6 +4368,7 @@ run_test 227 "changelog when explicit setting of HSM flags" test_228() { # test needs a running copytool copytool setup + mkdir_on_mdt0 $DIR/$tdir local fid=$(create_small_sync_file $DIR/$tfile) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tfile @@ -4456,67 +4402,93 @@ test_228() { run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse" test_250() { - # test needs a running copytool + local file="$DIR/$tdir/$tfile" + + # set max_requests to allow one request of each type to be started (3) + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + # speed up test + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + + # send 1 requests of each kind twice copytool setup - mkdir -p $DIR/$tdir - local maxrequest=$(get_hsm_param max_requests) - local rqcnt=$(($maxrequest * 3)) - local i="" + mkdir_on_mdt0 $DIR/$tdir - cdt_disable - for i in $(seq -w 1 $rqcnt); do - rm -f $DIR/$tdir/$i - dd if=/dev/urandom of=$DIR/$tdir/$i bs=1M count=10 conv=fsync + # setup the files + for action in archive restore remove; do + local filepath="$file"-to-$action + local fid=$(create_empty_file "$filepath") + local fid2=$(create_empty_file "$filepath".bis) + + if [ "$action" != archive ]; then + "$LFS" hsm_archive "$filepath" + wait_request_state $fid ARCHIVE SUCCEED + "$LFS" hsm_archive "$filepath".bis + wait_request_state $fid2 ARCHIVE SUCCEED + fi + if [ "$action" == restore ]; then + "$LFS" hsm_release "$filepath" + "$LFS" hsm_release "$filepath".bis + fi done - # we do it in 2 steps, so all requests arrive at the same time - for i in $(seq -w 1 $rqcnt); do - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$i + + # suspend the copytool to prevent requests from completing + stack_trap "copytool_continue" EXIT + copytool_suspend + + # send `max_requests' requests (one of each kind) + for action in archive restore remove; do + filepath="$file"-to-$action + "$LFS" hsm_${action} "$filepath" + wait_request_state $(path2fid "$filepath") "${action^^}" STARTED done - cdt_enable - local cnt=$rqcnt - local wt=$rqcnt - while [[ $cnt != 0 || $wt != 0 ]]; do - sleep 1 - cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.actions |\ - grep STARTED | grep -v CANCEL | wc -l") - [[ $cnt -le $maxrequest ]] || - error "$cnt > $maxrequest too many started requests" - wt=$(do_facet $SINGLEMDS "$LCTL get_param\ - $HSM_PARAM.actions |\ - grep WAITING | wc -l") - echo "max=$maxrequest started=$cnt waiting=$wt" + + # send another batch of requests + for action in archive restore remove; do + "$LFS" hsm_${action} "$file-to-$action".bis done + # wait for `loop_period' seconds to make sure the coordinator has time + # to register those, even though it should not + sleep 1 + + # only the first batch of request should be started + local -i count + count=$(do_facet $SINGLEMDS "$LCTL" get_param -n $HSM_PARAM.actions | + grep -c STARTED) + + ((count == 3)) || + error "expected 3 STARTED requests, found $count" } run_test 250 "Coordinator max request" test_251() { - # test needs a running copytool - copytool setup -b 1 + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") cdt_disable # to have a short test local old_to=$(get_hsm_param active_request_timeout) - set_hsm_param active_request_timeout 4 + set_hsm_param active_request_timeout 1 # to be sure the cdt will wake up frequently so # it will be able to cancel the "old" request local old_loop=$(get_hsm_param loop_period) - set_hsm_param loop_period 2 + set_hsm_param loop_period 1 cdt_enable - # clear locks to avoid extra delay caused by flush/cancel - # and thus prevent early copytool death to timeout. - cancel_lru_locks osc + copytool setup + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED - sleep 5 + + # Let the request timeout wait_request_state $fid ARCHIVE CANCELED set_hsm_param active_request_timeout $old_to @@ -4525,7 +4497,8 @@ test_251() { run_test 251 "Coordinator request timeout" test_252() { - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir + local f=$DIR/$tdir/$tfile local fid=$(create_empty_file "$f") @@ -4534,11 +4507,12 @@ test_252() { set_hsm_param loop_period 1 copytool setup - copytool_suspend + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED - rm -f $f + rm -f "$f" stack_trap "set_hsm_param active_request_timeout \ $(get_hsm_param active_request_timeout)" EXIT @@ -4554,7 +4528,7 @@ test_253() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile dd if=/dev/zero of=$f bs=1MB count=10 @@ -4585,8 +4559,8 @@ run_test 253 "Check for wrong file size after release" test_254a() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.56) ] && - skip "need MDS version at least 2.10.56" && return + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" # Check that the counters are initialized to 0 local count @@ -4603,8 +4577,8 @@ run_test 254a "Request counters are initialized to zero" test_254b() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.56) ] && - skip "need MDS version at least 2.10.56" && return + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" # The number of request to launch (at least 32) local request_count=$((RANDOM % 32 + 32)) @@ -4618,6 +4592,8 @@ test_254b() "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT set_hsm_param max_requests "$request_count" + mkdir_on_mdt0 $DIR/$tdir + local timeout local count for request_type in archive restore remove; do @@ -4673,6 +4649,309 @@ test_254b() } run_test 254b "Request counters are correctly incremented and decremented" +test_255() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" + + mkdir_on_mdt0 $DIR/$tdir + + local file="$DIR/$tdir/$tfile" + local fid=$(create_empty_file "$file") + + # How do you make sure the coordinator has consumed any outstanding + # event, without triggering an event yourself? + # + # You wait for a request to disappear from the coordinator's llog. + + # Warning: the setup represents 90% of this test + + # Create and process an HSM request + copytool setup + "$LFS" hsm_archive "$file" + wait_request_state $fid ARCHIVE SUCCEED + + kill_copytools + wait_copytools || error "failed to stop copytools" + + # Launch a new HSM request + rm "$file" + create_empty_file "$file" + "$LFS" hsm_archive "$file" + + cdt_shutdown + + # Have the completed request be removed as soon as the cdt wakes up + stack_trap "set_hsm_param grace_delay $(get_hsm_param grace_delay)" EXIT + set_hsm_param grace_delay 1 + # (Hopefully, time on the MDS will behave nicely) + do_facet $SINGLEMDS sleep 2 & + + # Increase `loop_period' as a mean to prevent the coordinator from + # waking itself up to do some housekeeping. + stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + + wait $! || error "waiting failed" + cdt_enable + wait_request_state $fid ARCHIVE "" + # The coordinator will not wake up on its own for ~`loop_period' secs... + + # ... Unless a copytool registers. Now the real test begins + copytool setup + wait_request_state $(path2fid "$file") ARCHIVE SUCCEED +} +run_test 255 "Copytool registration wakes the coordinator up" + +# tests 260[a-c] rely on the parsing of the copytool's log file, they might +# break in the future because of that. +test_260a() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + mkdir_on_mdt0 $DIR/$tdir + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup + "$LFS" hsm_archive "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Launch a copytool + copytool setup + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done + + error "Too many ARCHIVE requests were run before the RESTORE request" +} +run_test 260a "Restore request have priority over other requests" + +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260b() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + mkdir_on_mdt0 $DIR/$tdir + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Launch a copytool + copytool setup + copytool setup --archive-id 2 + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done + + error "Too many ARCHIVE requests were run before the RESTORE request" +} +run_test 260b "Restore request have priority over other requests" + +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260c() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + mkdir_on_mdt0 $DIR/$tdir + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Force the next coordinator run to do housekeeping + cdt_shutdown + cdt_enable + + "$LFS" hsm_archive "${files[1]}" + + # Launch a copytool + copytool setup + copytool setup --archive-id 2 + + wait_request_state "$(path2fid "${files[1]}")" ARCHIVE SUCCEED + # The coordinator just did a housekeeping run it won't do another one + # for around `loop_period' seconds => requests will not be reordered + # if it costs too much (ie. when the coordinator has to discard a whole + # hal) + + # Send several archive requests + for file in "${files[@]:2}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:2}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && + error "Restore requests should not be prioritised" \ + "unless the coordinator is doing housekeeping" + done + return 0 +} +run_test 260c "Requests are not reordered on the 'hot' path of the coordinator" + +test_261() { + local file=$DIR/$tdir/$tfile + local size + local fid + + copytool setup + mkdir_on_mdt0 $DIR/$tdir || error "mkdir $DIR/$tdir failed" + + dd if=/dev/zero of=$file bs=4k count=2 || error "Write $file failed" + fid=$(path2fid $file) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $file + wait_request_state $fid ARCHIVE SUCCEED + + $LFS hsm_state $file + $LFS hsm_release $file + $LFS hsm_restore $file + wait_request_state $fid RESTORE SUCCEED + $LFS hsm_release $file + size=$(stat -c %s $file) + [[ $size == 8192 ]] || error "Size after HSM release: $size" + + $LFS hsm_release $file + $LFS hsm_restore $file + $LFS hsm_release $file + size=$(stat -c %s $file) + [[ $size == 8192 ]] || error "Size after HSM release: $size" + $LFS hsm_state $file +} +run_test 261 "Report 0 bytes size after HSM release" + test_300() { [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return @@ -4752,7 +5031,7 @@ test_400() { copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local dir_mdt0=$DIR/$tdir/mdt0 local dir_mdt1=$DIR/$tdir/mdt1 @@ -4784,7 +5063,7 @@ test_401() { copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local dir_mdt0=$DIR/$tdir/mdt0 local dir_mdt1=$DIR/$tdir/mdt1 @@ -4842,7 +5121,7 @@ run_test 402a "Copytool start fails if all MDTs are inactive" test_402b() { copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile touch $f || error "touch $f failed" @@ -4895,7 +5174,7 @@ test_404() { copytool setup # create files on both MDT0000 and MDT0001 - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local dir_mdt0=$DIR/$tdir/mdt0 stack_trap "rm -rf $dir_mdt0" EXIT @@ -4924,7 +5203,7 @@ test_405() { copytool setup - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local striped_dir=$DIR/$tdir/striped_dir @@ -4970,12 +5249,14 @@ run_test 405 "archive and release under striped directory" test_406() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] && - skip "need MDS version at least 2.7.64" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.64) ] && + skip "need MDS version at least 2.7.64" local fid local mdt_index + mkdir_on_mdt0 $DIR/$tdir + fid=$(create_small_file $DIR/$tdir/$tfile) echo "old fid $fid" @@ -5026,17 +5307,13 @@ test_406() { run_test 406 "attempting to migrate HSM archived files is safe" test_407() { - needclients 2 || return 0 - # test needs a running copytool - copytool setup - - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local f2=$DIR2/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -5045,12 +5322,16 @@ test_407() { #define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x164 + # Prevent restore from completing + copytool_suspend + md5sum $f & # 1st request holds layout lock while appropriate # RESTORE record is still not added to llog md5sum $f2 & sleep 2 + do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions" # after umount hsm_actions->O/x/x log shouldn't have # double RESTORE records like below #[0x200000401:0x1:0x0]...0x58d03a0d/0x58d03a0c action=RESTORE...WAITING @@ -5058,24 +5339,37 @@ test_407() { sleep 30 && do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"& fail $SINGLEMDS + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - wait_request_state $fid RESTORE SUCCEED + do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions" + + copytool_continue + wait_all_done 100 $fid } run_test 407 "Check for double RESTORE records in llog" test_500() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.92) ] && - skip "HSM migrate is not supported" && return + [ "$MDS1_VERSION" -lt $(version_code 2.6.92) ] && + skip "HSM migrate is not supported" test_mkdir -p $DIR/$tdir - llapi_hsm_test -d $DIR/$tdir || error "One llapi HSM test failed" + + if [ "$CLIENT_VERSION" -lt $(version_code 2.11.56) ] || + [ "$MDS1_VERSION" -lt $(version_code 2.11.56) ]; + then + llapi_hsm_test -d $DIR/$tdir -b || + error "One llapi HSM test failed" + else + llapi_hsm_test -d $DIR/$tdir || + error "One llapi HSM test failed" + fi } run_test 500 "various LLAPI HSM tests" test_600() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] && - skip "need MDS version at least 2.10.58" && return 0 + [ "$MDS1_VERSION" -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" mkdir -p $DIR/$tdir @@ -5107,8 +5401,8 @@ test_600() { run_test 600 "Changelog fields 'u=' and 'nid='" test_601() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] && - skip "need MDS version at least 2.10.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" mkdir -p $DIR/$tdir @@ -5130,8 +5424,11 @@ test_601() { run_test 601 "OPEN Changelog entry" test_602() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] && - skip "need MDS version at least 2.10.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + stack_trap "restore_opencache" EXIT + disable_opencache mkdir -p $DIR/$tdir @@ -5177,8 +5474,8 @@ test_602() { run_test 602 "Changelog record CLOSE only if open+write or OPEN recorded" test_603() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] && - skip "need MDS version at least 2.10.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" mkdir -p $DIR/$tdir @@ -5202,10 +5499,10 @@ test_603() { run_test 603 "GETXATTR Changelog entry" test_604() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] && - skip "need MDS version at least 2.10.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile local f2=$DIR2/$tdir/$tfile @@ -5278,8 +5575,11 @@ test_604() { run_test 604 "NOPEN Changelog entry" test_605() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] && - skip "need MDS version at least 2.10.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + stack_trap "restore_opencache" EXIT + disable_opencache mkdir -p $DIR/$tdir @@ -5360,16 +5660,15 @@ test_605() { run_test 605 "Test OPEN and CLOSE rate limit in Changelogs" test_606() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] && - skip "need MDS version at least 2.10.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" local llog_reader=$(do_facet mgs "which llog_reader 2> /dev/null") llog_reader=${llog_reader:-$LUSTRE/utils/llog_reader} [ -z $(do_facet mgs ls -d $llog_reader 2> /dev/null) ] && - skip_env "missing llog_reader" && return - local fstype=$(facet_fstype mds1) + skip_env "missing llog_reader" - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -5387,8 +5686,9 @@ test_606() { local entry #remount mds1 as ldiskfs or zfs type - stack_trap "stop mds1; start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS" EXIT stop mds1 || error "stop mds1 failed" + stack_trap "unmount_fstype mds1; start mds1 $(mdsdevname 1)\ + $MDS_MOUNT_OPTS" EXIT mount_fstype mds1 || error "remount mds1 failed" for ((i = 0; i < 1; i++)); do @@ -5414,7 +5714,7 @@ test_606() { [ -n "$nid" ] || error "nid is empty" echo "Got NID $nid" [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || - error "nid '$nid' does not match any NID ${CLIENT_NIDS[@]}" + error "nid '$nid' does not match any NID ${CLIENT_NIDS[*]}" } run_test 606 "llog_reader groks changelog fields"