X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-hsm.sh;h=aba7228de5b867a5d41555334861c81955f83d59;hb=5b0469613c44a5423233a054d949dcd71c75a3c4;hp=0126b78addc9626e3e69c1ec941b02abefa5e5aa;hpb=7a6b48c2f97f165b4449f6283e313cfa33aea5a1;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 0126b78..aba7228 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -24,6 +24,7 @@ init_logging MULTIOP=${MULTIOP:-multiop} OPENFILE=${OPENFILE:-openfile} +MMAP_CAT=${MMAP_CAT:-mmap_cat} MOUNT_2=${MOUNT_2:-"yes"} FAIL_ON_ERROR=false @@ -102,7 +103,16 @@ init_agt_vars() { export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} export HSMTOOL_TESTDIR export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") + # $hsm_root/$HSMTMP Makes $hsm_root dir path less generic to ensure + # rm -rf $hsm_root/* is safe even if $hsm_root becomes unset to avoid + # deleting everything in filesystem, independent of any copytool. + export HSMTMP=${HSMTMP:-"shsm"} + HSM_ARCHIVE=$(copytool_device $SINGLEAGT) + + [ -z "${HSM_ARCHIVE// /}" ] && error "HSM_ARCHIVE is empty!" + HSM_ARCHIVE=$HSM_ARCHIVE/$HSMTMP + HSM_ARCHIVE_NUMBER=2 # The test only support up to 10 MDTs @@ -144,15 +154,40 @@ get_mdt_devices() { } search_copytools() { - local agents=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $agents "pgrep -x $HSMTOOL_BASE" + local hosts=${1:-$(facet_active_host $SINGLEAGT)} + do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" } -search_and_kill_copytool() { - local agents=${1:-$(facet_active_host $SINGLEAGT)} +kill_copytools() { + local hosts=${1:-$(facet_active_host $SINGLEAGT)} + + echo "Killing existing copytools on $hosts" + do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true +} + +wait_copytools() { + local hosts=${1:-$(facet_active_host $SINGLEAGT)} + local wait_timeout=200 + local wait_start=$SECONDS + local wait_end=$((wait_start + wait_timeout)) + + while ((SECONDS < wait_end)); do + sleep 2 + if ! search_copytools $hosts; then + echo "copytools stopped in $((SECONDS - wait_start))s" + return 0 + fi + + echo "copytools still running on $hosts" + done + + # try to dump Copytool's stack + do_nodesv $hosts "echo 1 >/proc/sys/kernel/sysrq ; " \ + "echo t >/proc/sysrq-trigger" - echo "Killing existing copytools on $agents" - do_nodesv $agents "killall -q $HSMTOOL_BASE" || true + echo "copytools failed to stop in ${wait_timeout}s" + + return 1 } copytool_monitor_setup() { @@ -218,6 +253,9 @@ copytool_setup() { local lustre_mntpnt=${2:-${MOUNT2:-$MOUNT}} local arc_id=$3 local hsm_root=${4:-$(copytool_device $facet)} + + [ -z "${hsm_root// /}" ] && error "copytool_setup: hsm_root empty!" + local agent=$(facet_active_host $facet) if [[ -z "$arc_id" ]] && @@ -228,14 +266,16 @@ copytool_setup() { if $HSM_ARCHIVE_PURGE; then echo "Purging archive on $agent" - do_facet $facet "rm -rf $hsm_root/*" + do_facet $facet "rm -rf $hsm_root/$HSMTMP/*" fi echo "Starting copytool $facet on $agent" - do_facet $facet "mkdir -p $hsm_root" || error "mkdir '$hsm_root' failed" + do_facet $facet "mkdir -p $hsm_root/$HSMTMP/" || + error "mkdir '$hsm_root/$HSMTMP' failed" # bandwidth is limited to 1MB/s so the copy time is known and # independent of hardware - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root $hsm_root" + local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon" + cmd+=" --hsm-root $hsm_root/$HSMTMP" [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id" [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] || cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL" @@ -272,54 +312,61 @@ get_copytool_event_log() { copytool_cleanup() { trap - EXIT - local facet=$SINGLEAGT - local agents=${1:-$(facet_active_host $facet)} - local mdtno - local idx - local oldstate - local mdt_hsmctrl - local hsm_root=$(copytool_device $facet) - local end_wait=$(( SECONDS + TIMEOUT )) + local agt_facet=$SINGLEAGT + local agt_hosts=${1:-$(facet_active_host $agt_facet)} + local hsm_root=$(copytool_device $agt_facet) - do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0 + [ -z "${hsm_root// /}" ] && error "copytool_cleanup: hsm_root empty!" - while (( SECONDS < end_wait )); do - sleep 2 - do_nodesv $agents "pgrep -x $HSMTOOL_BASE" - if [ $? -ne 0 ]; then - echo "Copytool is stopped on $agents" - break - fi - echo "Copytool still running on $agents" + local i + local facet + local param + local -a state + + kill_copytools $agt_hosts + wait_copytools $agt_hosts || error "copytools failed to stop" + + # Clean all CDTs orphans requests from previous tests that + # would otherwise need to timeout to clear. + for ((i = 0; i < MDSCOUNT; i++)); do + facet=mds$((i + 1)) + param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) + state[$i]=$(do_facet $facet "$LCTL get_param -n $param") + + # Skip already stopping or stopped CDTs. + [[ "${state[$i]}" =~ ^stop ]] && continue + + do_facet $facet "$LCTL set_param $param=shutdown" done - if do_nodesv $agents "pgrep -x $HSMTOOL_BASE"; then - error "Copytool failed to stop in ${TIMEOUT}s ..." - else - echo "Copytool has stopped in " \ - "$((TIMEOUT - (end_wait - SECONDS)))s." - fi - # clean all CDTs orphans requests from previous tests - # that would otherwise need to timeout to clear. - for mdtno in $(seq 1 $MDSCOUNT); do - idx=$(($mdtno - 1)) - mdt_hsmctrl="mdt.$FSNAME-MDT000${idx}.hsm_control" - oldstate=$(do_facet mds${mdtno} "$LCTL get_param -n " \ - "$mdt_hsmctrl") - # skip already stop[ed,ing] CDTs - echo $oldstate | grep stop && continue - - do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=shutdown" - wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \ - "stopped" 20 || - error "mds${mdtno} cdt state is not stopped" - do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=$oldstate" - wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \ - "$oldstate" 20 || - error "mds${mdtno} cdt state is not $oldstate" + for ((i = 0; i < MDSCOUNT; i++)); do + # Only check and restore CDTs that we stopped in the first loop. + [[ "${state[$i]}" =~ ^stop ]] && continue + + facet=mds$((i + 1)) + param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) + + wait_result $facet "$LCTL get_param -n $param" stopped 20 || + error "$facet CDT state is not stopped" + + # Restore old CDT state. + do_facet $facet "$LCTL set_param $param=${state[$i]}" + done + + for ((i = 0; i < MDSCOUNT; i++)); do + # Only check CDTs that we stopped in the first loop. + [[ "${state[$i]}" =~ ^stop ]] && continue + + facet=mds$((i + 1)) + param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) + + # Check that the old CDT state was restored. + wait_result $facet "$LCTL get_param -n $param" "${state[$i]}" \ + 20 || error "$facet CDT state is not '${state[$i]}'" done - if do_facet $facet "df $hsm_root" >/dev/null 2>&1 ; then - do_facet $facet "rm -rf $hsm_root/*" + + if do_facet $agt_facet "df $hsm_root" >/dev/null 2>&1 ; then + do_facet $agt_facet "rm -rf $hsm_root/$HSMTMP/*" fi } @@ -634,66 +681,19 @@ check_enough_free_space() { return 0 } -make_large_for_striping() { - local file2=${1/$DIR/$DIR2} - local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1) - - cleanup_large_files - - check_enough_free_space 5 $sz - [ $? != 0 ] && return $? - - dd if=/dev/urandom of=$file2 count=5 bs=$sz conv=fsync || - file_creation_failure dd $file2 $? - - path2fid $1 || error "cannot get fid on $1" -} - -make_large_for_progress() { +make_custom_file_for_progress() { local file2=${1/$DIR/$DIR2} + local fsize=${2:-"39"} + local blksz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1) + blksz=${3:-$blksz} - cleanup_large_files - - check_enough_free_space 39 1000000 - [ $? != 0 ] && return $? - - # big file is large enough, so copy time is > 30s - # so copytool make 1 progress - # size is not a multiple of 1M to avoid stripe - # aligment - dd if=/dev/urandom of=$file2 count=39 bs=1000000 conv=fsync || - file_creation_failure dd $file2 $? - - path2fid $1 || error "cannot get fid on $1" -} - -make_large_for_progress_aligned() { - local file2=${1/$DIR/$DIR2} - - cleanup_large_files - - check_enough_free_space 33 1048576 - [ $? != 0 ] && return $? - - # big file is large enough, so copy time is > 30s - # so copytool make 1 progress - # size is a multiple of 1M to have stripe - # aligment - dd if=/dev/urandom of=$file2 count=33 bs=1M conv=fsync || - file_creation_failure dd $file2 $? - path2fid $1 || error "cannot get fid on $1" -} - -make_large_for_cancel() { - local file2=${1/$DIR/$DIR2} + [[ $fsize -gt 0 ]] || error "Invalid file size" + [[ $blksz -gt 0 ]] || error "Invalid stripe size" cleanup_large_files - - check_enough_free_space 103 1048576 + check_enough_free_space $fsize $blksz [ $? != 0 ] && return $? - - # Copy timeout is 100s. 105MB => 105s - dd if=/dev/urandom of=$file2 count=103 bs=1M conv=fsync || + dd if=/dev/zero of=$file2 count=$fsize bs=$blksz conv=fsync || file_creation_failure dd $file2 $? path2fid $1 || error "cannot get fid on $1" } @@ -764,14 +764,74 @@ parse_json_event() { echo $raw_event | python -c "$json_parser" } -# populate MDT device array -get_mdt_devices +get_agent_by_uuid_mdt() { + local uuid=$1 + local mdtidx=$2 + local mds=mds$(($mdtidx + 1)) + do_facet $mds "$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.agents |\ + grep $uuid" +} + +check_agent_registered_by_mdt() { + local uuid=$1 + local mdtidx=$2 + local mds=mds$(($mdtidx + 1)) + local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) + if [[ ! -z "$agent" ]]; then + echo "found agent $agent on $mds" + else + error "uuid $uuid not found in agent list on $mds" + fi +} + +check_agent_unregistered_by_mdt() { + local uuid=$1 + local mdtidx=$2 + local mds=mds$(($mdtidx + 1)) + local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) + if [[ -z "$agent" ]]; then + echo "uuid not found in agent list on $mds" + else + error "uuid found in agent list on $mds: $agent" + fi +} + +check_agent_registered() { + local uuid=$1 + local mdsno + for mdsno in $(seq 1 $MDSCOUNT); do + check_agent_registered_by_mdt $uuid $((mdsno - 1)) + done +} + +check_agent_unregistered() { + local uuid=$1 + local mdsno + for mdsno in $(seq 1 $MDSCOUNT); do + check_agent_unregistered_by_mdt $uuid $((mdsno - 1)) + done +} + +get_agent_uuid() { + local agent=${1:-$(facet_active_host $SINGLEAGT)} + + # Lustre mount-point is mandatory and last parameter on + # copytool cmd-line. + local mntpnt=$(do_rpc_nodes $agent ps -C $HSMTOOL_BASE -o args= | + awk '{print $NF}') + [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\ + "parameter" + do_rpc_nodes $agent get_client_uuid $mntpnt | cut -d' ' -f2 +} # initiate variables init_agt_vars +# populate MDT device array +get_mdt_devices + # cleanup from previous bad setup -search_and_kill_copytool +kill_copytools # for recovery tests, coordinator needs to be started at mount # so force it @@ -824,6 +884,23 @@ test_1() { } run_test 1 "lfs hsm flags root/non-root access" +test_1a() { + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(make_small $f) + + $LFS hsm_archive $f || error "could not archive file" + wait_request_state $fid ARCHIVE SUCCEED + + # Release and check states + $LFS hsm_release $f || error "could not release file" + echo -n "Verifying released state: " + check_hsm_flags $f "0x0000000d" + + $MMAP_CAT $f > /dev/null || error "failed mmap & cat release file" +} +run_test 1a "mmap & cat a HSM released file" + test_2() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -935,13 +1012,19 @@ test_8() { run_test 8 "Test default archive number" test_9() { - mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/passwd $f) # we do not use the default one to be sure local new_an=$((HSM_ARCHIVE_NUMBER + 1)) copytool_cleanup copytool_setup $SINGLEAGT $MOUNT $new_an + + # give time for CT to register with MDTs + sleep $(($MDSCOUNT*2)) + local uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT)) + check_agent_registered $uuid + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/passwd $f) $LFS hsm_archive --archive $new_an $f wait_request_state $fid ARCHIVE SUCCEED @@ -1175,7 +1258,7 @@ test_12c() { local f=$DIR/$tdir/$tfile $LFS setstripe -c 2 $f local fid - fid=$(make_large_for_striping $f) + fid=$(make_custom_file_for_progress $f 5) [ $? != 0 ] && skip "not enough free space" && return local FILE_CRC=$(md5sum $f) @@ -1431,6 +1514,8 @@ cleanup_test_12q() { } test_12q() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.58) ] && + skip "need MDS version at least 2.7.58" && return 0 zconf_mount $(facet_host $SINGLEAGT) $MOUNT3 || error "cannot mount $MOUNT3 on $SINGLEAGT" @@ -1452,9 +1537,8 @@ test_12q() { $LFS hsm_release $f || error "could not release file" check_hsm_flags $f "0x0000000d" - search_and_kill_copytool - sleep 5 - search_copytools && error "Copytool should have stopped" + kill_copytools + wait_copytools || error "copytool failed to stop" cat $f > /dev/null & @@ -1639,7 +1723,8 @@ test_16() { $LFS hsm_archive $f wait_request_state $fid ARCHIVE SUCCEED local end=$(date +%s) - local duration=$((end - start)) + # Add 1 to account for rounding errors between start and end (LU-8155) + local duration=$((end - start + 1)) [[ $duration -ge $goal ]] || error "Transfer is too fast $duration < $goal" @@ -1873,8 +1958,7 @@ test_24a() { [ $ctime0 -eq $ctime1 ] || error "release changed ctime from $ctime0 to $ctime1" - # Restore should not change atime or mtime and should not - # decrease ctime. + # Restore should not change any timestamps. $LFS hsm_restore $file wait_request_state $fid RESTORE SUCCEED @@ -2001,7 +2085,6 @@ test_24c() { chown $RUNAS_ID:nobody $file || error "cannot chown '$file' to '$RUNAS_ID:nobody'" - set_hsm_param user_request_mask "" $RUNAS $LFS hsm_$action $file && error "$action by user should fail" @@ -2015,7 +2098,6 @@ test_24c() { chown nobody:$RUNAS_GID $file || error "cannot chown '$file' to 'nobody:$RUNAS_GID'" - set_hsm_param group_request_mask "" $RUNAS $LFS hsm_$action $file && error "$action by group should fail" @@ -2029,7 +2111,6 @@ test_24c() { chown nobody:nobody $file || error "cannot chown '$file' to 'nobody:nobody'" - set_hsm_param other_request_mask "" $RUNAS $LFS hsm_$action $file && error "$action by other should fail" @@ -2192,7 +2273,7 @@ test_26() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -2232,7 +2313,7 @@ test_27b() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -2254,7 +2335,7 @@ test_28() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -2511,7 +2592,7 @@ test_31b() { local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -2535,7 +2616,7 @@ test_31c() { local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress_aligned $f) + fid=$(make_custom_file_for_progress $f 33 1048576) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -2559,7 +2640,7 @@ test_33() { local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -2626,7 +2707,7 @@ test_34() { local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -2662,7 +2743,7 @@ test_35() { local f=$DIR/$tdir/$tfile local f1=$DIR/$tdir/$tfile-1 local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return local fid1=$(copy_file /etc/passwd $f1) @@ -2701,7 +2782,7 @@ test_36() { local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -2865,7 +2946,7 @@ test_54() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_large_for_progress $f) + local fid=$(make_custom_file_for_progress $f 39 1000000) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -2893,7 +2974,7 @@ test_55() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_large_for_progress $f) + local fid=$(make_custom_file_for_progress $f 39 1000000) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -2922,7 +3003,7 @@ test_56() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || @@ -3034,6 +3115,10 @@ run_test 58 "Truncate a released file will trigger restore" test_59() { local fid + local server_version=$(lustre_version_code $SINGLEMDS) + [[ $server_version -lt $(version_code 2.7.63) ]] && + skip "Need MDS version at least 2.7.63" && return + copytool_setup $MCREATE $DIR/$tfile || error "mcreate failed" $TRUNCATE $DIR/$tfile 42 || error "truncate failed" @@ -3059,7 +3144,7 @@ test_60() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 10) [ $? != 0 ] && skip "not enough free space" && return local mdtidx=0 @@ -3110,10 +3195,14 @@ test_60() { local elapsed=$((finish_at - start_at)) # Ensure that the progress update occurred within the expected window. - if [ $elapsed -lt $interval ]; then + if [ $elapsed -lt $((interval - 1)) ]; then error "Expected progress update after at least $interval seconds" fi + echo "Wait for on going archive hsm action to complete" + wait_update $agent "grep -o copied $copytool_log" "copied" 10 || + echo "File archiving not completed even after 10 secs" + cdt_clear_no_retry copytool_cleanup } @@ -3178,7 +3267,7 @@ test_71() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || @@ -3456,7 +3545,7 @@ test_104() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return # if cdt is on, it can serve too quickly the request @@ -3499,66 +3588,6 @@ test_105() { } run_test 105 "Restart of coordinator" -get_agent_by_uuid_mdt() { - local uuid=$1 - local mdtidx=$2 - local mds=mds$(($mdtidx + 1)) - do_facet $mds "$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.agents |\ - grep $uuid" -} - -check_agent_registered_by_mdt() { - local uuid=$1 - local mdtidx=$2 - local mds=mds$(($mdtidx + 1)) - local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) - if [[ ! -z "$agent" ]]; then - echo "found agent $agent on $mds" - else - error "uuid $uuid not found in agent list on $mds" - fi -} - -check_agent_unregistered_by_mdt() { - local uuid=$1 - local mdtidx=$2 - local mds=mds$(($mdtidx + 1)) - local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) - if [[ -z "$agent" ]]; then - echo "uuid not found in agent list on $mds" - else - error "uuid found in agent list on $mds: $agent" - fi -} - -check_agent_registered() { - local uuid=$1 - local mdsno - for mdsno in $(seq 1 $MDSCOUNT); do - check_agent_registered_by_mdt $uuid $((mdsno - 1)) - done -} - -check_agent_unregistered() { - local uuid=$1 - local mdsno - for mdsno in $(seq 1 $MDSCOUNT); do - check_agent_unregistered_by_mdt $uuid $((mdsno - 1)) - done -} - -get_agent_uuid() { - local agent=${1:-$(facet_active_host $SINGLEAGT)} - - # Lustre mount-point is mandatory and last parameter on - # copytool cmd-line. - local mntpnt=$(do_rpc_nodes $agent pgrep -fl $HSMTOOL_BASE | - grep -v pgrep | awk '{print $NF}') - [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\ - "parameter" - do_rpc_nodes $agent get_client_uuid $mntpnt | cut -d' ' -f2 -} - test_106() { # test needs a running copytool copytool_setup @@ -3784,7 +3813,7 @@ test_200() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_cancel $f) + fid=$(make_custom_file_for_progress $f 103 1048576) [ $? != 0 ] && skip "not enough free space" && return # test with cdt on is made in test_221 @@ -3832,7 +3861,7 @@ test_202() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -3882,7 +3911,7 @@ test_221() { local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_cancel $f) + fid=$(make_custom_file_for_progress $f 103 1048576) [ $? != 0 ] && skip "not enough free space" && return changelog_setup @@ -3991,7 +4020,7 @@ test_223b() { local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return changelog_setup @@ -4053,7 +4082,7 @@ test_225() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_progress $f) + fid=$(make_custom_file_for_progress $f 39 1000000) [ $? != 0 ] && skip "not enough free space" && return changelog_setup @@ -4258,7 +4287,7 @@ test_251() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_large_for_cancel $f) + fid=$(make_custom_file_for_progress $f 103 1048576) [ $? != 0 ] && skip "not enough free space" && return cdt_disable @@ -4567,6 +4596,66 @@ test_405() { } run_test 405 "archive and release under striped directory" +test_406() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] && + skip "need MDS version at least 2.7.64" && return 0 + + local fid + local mdt_index + + copytool_setup + mkdir -p $DIR/$tdir + fid=$(make_small $DIR/$tdir/$tfile) + echo "old fid $fid" + + $LFS hsm_archive $DIR/$tdir/$tfile + wait_request_state "$fid" ARCHIVE SUCCEED + $LFS hsm_release $DIR/$tdir/$tfile + + # Should migrate $tdir but not $tfile. + $LFS mv -M1 $DIR/$tdir && + error "migrating HSM an archived file should fail" + + $LFS hsm_restore $DIR/$tdir/$tfile + wait_request_state "$fid" RESTORE SUCCEED + + $LFS hsm_remove $DIR/$tdir/$tfile + wait_request_state "$fid" REMOVE SUCCEED + + cat $DIR/$tdir/$tfile > /dev/null || + error "cannot read $DIR/$tdir/$tfile" + + $LFS mv -M1 $DIR/$tdir || + error "cannot complete migration after HSM remove" + + mdt_index=$($LFS getstripe -M $DIR/$tdir) + if ((mdt_index != 1)); then + error "expected MDT index 1, got $mdt_index" + fi + + # Refresh fid after migration. + fid=$(path2fid $DIR/$tdir/$tfile) + echo "new fid $fid" + + $LFS hsm_archive $DIR/$tdir/$tfile + wait_request_state "$fid" ARCHIVE SUCCEED 1 + + lctl set_param debug=+trace + $LFS hsm_release $DIR/$tdir/$tfile || + error "cannot release $DIR/$tdir/$tfile" + + $LFS hsm_restore $DIR/$tdir/$tfile + wait_request_state "$fid" RESTORE SUCCEED 1 + + cat $DIR/$tdir/$tfile > /dev/null || + error "cannot read $DIR/$tdir/$tfile" + + copytool_cleanup +} +run_test 406 "attempting to migrate HSM archived files is safe" + test_500() { [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.92) ] &&