X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-hsm.sh;h=f87ef93a3b6b73f4a07540a1f3e308a1f11027a4;hp=fc48b693129c743037b0094ab471c8515d9ee55e;hb=597214dc586ae9d329f2bb819600ac4b9ddfcc06;hpb=945c25419afee0935bff8619df45f8e3df13fa23 diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index fc48b69..f87ef93 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -13,12 +13,6 @@ export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin ONLY=${ONLY:-"$*"} # bug number for skipped test: LU-3815 ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36" -# bug number for skipped test:LU-5474 -ALWAYS_EXCEPT="$ALWAYS_EXCEPT 90" -# bug number for skipped test:LU-4178 -ALWAYS_EXCEPT="$ALWAYS_EXCEPT 200 221 223b" -# bug number for skipped test:LU-3852 -ALWAYS_EXCEPT="$ALWAYS_EXCEPT 251" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} @@ -216,7 +210,7 @@ copytool_setup() { if [[ -z "$arc_id" ]] && do_facet $facet "pkill -CONT -x $HSMTOOL_BASE"; then - echo "Wakeup copytool $facet on $agent" + echo "Only wakeup running copytool $facet on $agent" return 0 fi @@ -273,10 +267,25 @@ copytool_cleanup() { local oldstate local mdt_hsmctrl local hsm_root=$(copytool_device $facet) + local end_wait=$(( SECONDS + TIMEOUT )) do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0 - sleep 1 - echo "Copytool is stopped on $agents" + + while (( SECONDS < end_wait )); do + sleep 2 + do_nodesv $agents "pgrep -x $HSMTOOL_BASE" + if [ $? -ne 0 ]; then + echo "Copytool is stopped on $agents" + break + fi + echo "Copytool still running on $agents" + done + if do_nodesv $agents "pgrep -x $HSMTOOL_BASE"; then + error "Copytool failed to stop in ${TIMEOUT}s ..." + else + echo "Copytool has stopped in " \ + "$((TIMEOUT - (end_wait - SECONDS)))s." + fi # clean all CDTs orphans requests from previous tests # that would otherwise need to timeout to clear. @@ -297,7 +306,9 @@ copytool_cleanup() { "$oldstate" 20 || error "mds${mdtno} cdt state is not $oldstate" done - do_facet $facet "rm -rf $hsm_root" + if do_facet $facet "df $hsm_root" >/dev/null 2>&1 ; then + do_facet $facet "rm -rf $hsm_root/*" + fi } copytool_suspend() { @@ -325,7 +336,7 @@ make_archive() { local file=$HSM_ARCHIVE/$1 do_facet $SINGLEAGT mkdir -p $(dirname $file) do_facet $SINGLEAGT dd if=/dev/urandom of=$file count=32 bs=1000000 || - error "cannot create $file" + file_creation_failure dd $file $? } copy2archive() { @@ -545,6 +556,15 @@ check_hsm_flags_user() { [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl" } +file_creation_failure() { + local cmd=$1 + local f=$2 + local err=$3 + + df $MOUNT $MOUNT2 + error "cannot create $f with $cmd, status=$err" +} + copy_file() { local f= @@ -558,20 +578,22 @@ copy_file() { f=${f/$DIR/$DIR2} fi rm -f $f - cp $1 $f || error "cannot copy $1 to $f" + cp $1 $f || file_creation_failure cp $f $? + path2fid $f || error "cannot get fid on $f" } make_small() { local file2=${1/$DIR/$DIR2} dd if=/dev/urandom of=$file2 count=2 bs=1M conv=fsync || - error "cannot create $file2" + file_creation_failure dd $file2 $? + path2fid $1 || error "cannot get fid on $1" } make_small_sync() { dd if=/dev/urandom of=$1 count=1 bs=1M conv=sync || - error "cannot create $1" + file_creation_failure dd $1 $? path2fid $1 || error "cannot get fid on $1" } @@ -581,14 +603,27 @@ cleanup_large_files() { [ $ratio -gt 50 ] && find $MOUNT -size +10M -exec rm -f {} \; } +check_enough_free_space() { + local nb=$1 + local unit=$2 + local need=$((nb * unit /1024)) + local free=$(df -kP $MOUNT | tail -1 | awk '{print $4}') + (( $need >= $free )) && return 1 + return 0 +} + make_large_for_striping() { local file2=${1/$DIR/$DIR2} local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1) cleanup_large_files + check_enough_free_space 5 $sz + [ $? != 0 ] && return $? + dd if=/dev/urandom of=$file2 count=5 bs=$sz conv=fsync || - error "cannot create $file2" + file_creation_failure dd $file2 $? + path2fid $1 || error "cannot get fid on $1" } @@ -597,12 +632,16 @@ make_large_for_progress() { cleanup_large_files + check_enough_free_space 39 1000000 + [ $? != 0 ] && return $? + # big file is large enough, so copy time is > 30s # so copytool make 1 progress # size is not a multiple of 1M to avoid stripe # aligment dd if=/dev/urandom of=$file2 count=39 bs=1000000 conv=fsync || - error "cannot create $file2" + file_creation_failure dd $file2 $? + path2fid $1 || error "cannot get fid on $1" } @@ -611,12 +650,15 @@ make_large_for_progress_aligned() { cleanup_large_files + check_enough_free_space 33 1048576 + [ $? != 0 ] && return $? + # big file is large enough, so copy time is > 30s # so copytool make 1 progress # size is a multiple of 1M to have stripe # aligment dd if=/dev/urandom of=$file2 count=33 bs=1M conv=fsync || - error "cannot create $file2" + file_creation_failure dd $file2 $? path2fid $1 || error "cannot get fid on $1" } @@ -625,9 +667,12 @@ make_large_for_cancel() { cleanup_large_files + check_enough_free_space 103 1048576 + [ $? != 0 ] && return $? + # Copy timeout is 100s. 105MB => 105s dd if=/dev/urandom of=$file2 count=103 bs=1M conv=fsync || - error "cannot create $file2" + file_creation_failure dd $file2 $? path2fid $1 || error "cannot get fid on $1" } @@ -812,7 +857,7 @@ test_3() { error "user could not change hsm flags" dd if=/etc/passwd of=$f.append bs=1 count=3\ conv=notrunc oflag=append status=noxfer || - error "could not append to test file" + file_creation_failure dd $f.append $? check_hsm_flags $f.append "0x00000003" # Modify a file sets it dirty @@ -821,7 +866,7 @@ test_3() { error "user could not change hsm flags" dd if=/dev/zero of=$f.modify bs=1 count=3\ conv=notrunc status=noxfer || - error "could not modify test file" + file_creation_failure dd $f.modify $? check_hsm_flags $f.modify "0x00000003" # Open O_TRUNC sets dirty @@ -1108,6 +1153,8 @@ test_12c() { local f=$DIR/$tdir/$tfile $LFS setstripe -c 2 $f local fid=$(make_large_for_striping $f) + [ $? != 0 ] && skip "not enough free space" && return + local FILE_CRC=$(md5sum $f) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -1983,6 +2030,8 @@ test_26() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2020,6 +2069,8 @@ test_27b() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -2039,6 +2090,8 @@ test_28() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2217,6 +2270,8 @@ test_31b() { local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -2238,6 +2293,8 @@ test_31c() { local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress_aligned $f) + [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -2259,6 +2316,8 @@ test_33() { local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -2323,6 +2382,8 @@ test_34() { local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -2356,6 +2417,8 @@ test_35() { local f=$DIR/$tdir/$tfile local f1=$DIR/$tdir/$tfile-1 local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + local fid1=$(copy_file /etc/passwd $f1) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2392,6 +2455,8 @@ test_36() { local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -2445,7 +2510,7 @@ test_40() { done # force copytool to use a local/temp archive dir to ensure best # performance vs remote/NFS mounts used in auto-tests - if df --local $HSM_ARCHIVE >/dev/null 2>&1 ; then + if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then copytool_setup else copytool_setup $SINGLEAGT $MOUNT $HSM_ARCHIVE_NUMBER $TMP/$tdir @@ -2585,6 +2650,7 @@ test_56() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -2695,10 +2761,10 @@ run_test 58 "Truncate a released file will trigger restore" test_60() { # This test validates the fix for LU-4512. Ensure that the -u - # option changes the progress reporting interval from the default - # (30 seconds) to the user-specified interval. + # option changes the progress reporting interval from the + # default (30 seconds) to the user-specified interval. local interval=5 - local progress_timeout=$((interval * 3)) + local progress_timeout=$((interval * 4)) # test needs a new running copytool copytool_cleanup @@ -2707,15 +2773,21 @@ test_60() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) - - local start_at=$(date +%s) - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || - error "could not archive file" + [ $? != 0 ] && skip "not enough free space" && return local mdtidx=0 local mdt=${MDT_PREFIX}${mdtidx} local mds=mds$((mdtidx + 1)) + # Wait for copytool to register + wait_update_facet $mds \ + "$LCTL get_param -n ${mdt}.hsm.agents | grep -o ^uuid" \ + uuid 100 || error "coyptool failed to register with $mdt" + + local start_at=$(date +%s) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || + error "could not archive file" + local cmd="$LCTL get_param -n ${mdt}.hsm.active_requests" cmd+=" | awk '/'$fid'.*action=ARCHIVE/ {print \\\$12}' | cut -f2 -d=" @@ -2809,6 +2881,7 @@ test_71() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -2990,7 +3063,14 @@ test_90() { fid=$(copy_file /etc/hosts $f.$i) echo $f.$i >> $FILELIST done - copytool_setup + # force copytool to use a local/temp archive dir to ensure best + # performance vs remote/NFS mounts used in auto-tests + if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then + copytool_setup + else + local dai=$(get_hsm_param default_archive_id) + copytool_setup $SINGLEAGT $MOUNT $dai $TMP/$tdir + fi # to be sure wait_all_done will not be mislead by previous tests cdt_purge wait_for_grace_delay @@ -3078,6 +3158,8 @@ test_104() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + # if cdt is on, it can serve too quickly the request cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f @@ -3389,6 +3471,8 @@ test_200() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_cancel $f) + [ $? != 0 ] && skip "not enough free space" && return + # test with cdt on is made in test_221 cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -3430,6 +3514,8 @@ test_202() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -3475,6 +3561,7 @@ test_221() { local f=$DIR/$tdir/$tfile local fid=$(make_large_for_cancel $f) + [ $? != 0 ] && skip "not enough free space" && return changelog_setup @@ -3582,6 +3669,7 @@ test_223b() { local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return changelog_setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -3642,6 +3730,7 @@ test_225() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_progress $f) + [ $? != 0 ] && skip "not enough free space" && return changelog_setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -3845,6 +3934,7 @@ test_251() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(make_large_for_cancel $f) + [ $? != 0 ] && skip "not enough free space" && return cdt_disable # to have a short test