X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-hsm.sh;h=8b89a1852ffed17dc83464725bbe9729d766cc96;hp=37a66ce30d69191ee2ce836a51beb190e18ea9fe;hb=0ec1dbbea450375af070838ce6487e14c6595c83;hpb=57118830eb55ab43b4b6fc096ad40b2bd83c6de3 diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 37a66ce..8b89a18 100644 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -11,12 +11,11 @@ SRCDIR=$(dirname $0) export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin ONLY=${ONLY:-"$*"} -# bug number for skipped test: +# bug number for skipped test: 3815 3939 +ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36 40" +# bug number for skipped test:4178 4176 +ALWAYS_EXCEPT="$ALWAYS_EXCEPT 200 221 223b 31a" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -# skip test cases failed before landing - Jinshan - -ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 31a 34 35 36" -ALWAYS_EXCEPT="$ALWAYS_EXCEPT 200 201 221 223a 223b 225" LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} @@ -164,10 +163,34 @@ copytool_setup() { copytool_cleanup() { trap - EXIT local agents=${1:-$(facet_active_host $SINGLEAGT)} + local mdtno + local idx + local oldstate + local mdt_hsmctrl do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0 sleep 1 echo "Copytool is stopped on $agents" + + # clean all CDTs orphans requests from previous tests + # that would otherwise need to timeout to clear. + for mdtno in $(seq 1 $MDSCOUNT); do + idx=$(($mdtno - 1)) + mdt_hsmctrl="mdt.$FSNAME-MDT000${idx}.hsm_control" + oldstate=$(do_facet mds${mdtno} "$LCTL get_param -n " \ + "$MDT_HSMCTRL") + # skip already stop[ed,ing] CDTs + echo $oldstate | grep stop || continue + + do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=shutdown" + wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \ + "stopped" 20 || + error "mds${mdtno} cdt state is not stopped" + do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=$oldstate" + wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \ + "$oldstate" 20 || + error "mds${mdtno} cdt state is not $oldstate" + done } copytool_suspend() { @@ -179,7 +202,7 @@ copytool_suspend() { copytool_remove_backend() { local fid=$1 - local be=$(find $HSM_ARCHIVE -name $fid) + local be=$(do_facet $SINGLEAGT find $HSM_ARCHIVE -name $fid) echo "Remove from backend: $fid = $be" do_facet $SINGLEAGT rm -f $be } @@ -401,7 +424,8 @@ make_small() { } cleanup_large_files() { - local ratio=$(df $MOUNT |awk '{print $5}' |sed 's/%//g' |grep -v Use) + local ratio=$(df -P $MOUNT | tail -1 | awk '{print $5}' | + sed 's/%//g') [ $ratio -gt 50 ] && find $MOUNT -size +10M -exec rm -f {} \; } @@ -466,7 +490,7 @@ wait_request_state() { local request=$2 local state=$3 - local cmd="$LCTL get_param -n $HSM_PARAM.agent_actions" + local cmd="$LCTL get_param -n $HSM_PARAM.actions" cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" wait_result $SINGLEMDS "$cmd" $state 100 || @@ -477,7 +501,7 @@ get_request_state() { local fid=$1 local request=$2 - do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |"\ + do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.actions |"\ "awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" } @@ -485,14 +509,14 @@ get_request_count() { local fid=$1 local request=$2 - do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |"\ + do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.actions |"\ "awk -vn=0 '/'$fid'.*action='$request'/ {n++}; END {print n}'" } wait_all_done() { local timeout=$1 - local cmd="$LCTL get_param -n $HSM_PARAM.agent_actions" + local cmd="$LCTL get_param -n $HSM_PARAM.actions" cmd+=" | egrep 'WAITING|STARTED'" wait_result $SINGLEMDS "$cmd" "" $timeout || @@ -1082,8 +1106,8 @@ test_13() { CURR_FILE="$CURR_DIR/$tfile.$f" # write file-specific data do_facet $SINGLEAGT \ - echo "d=$d, f=$f, dir=$CURR_DIR, "\ - "file=$CURR_FILE" > $CURR_FILE + "echo d=$d, f=$f, dir=$CURR_DIR, "\ + "file=$CURR_FILE > $CURR_FILE" done done # import to Lustre @@ -1827,6 +1851,41 @@ test_30b() { } run_test 30b "Restore at exec (release case)" +test_30c() { + needclients 2 || return 0 + + # test needs a running copytool + copytool_setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/SLEEP + local fid=$(copy_file /bin/sleep $f) + chmod 755 $f + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f + check_hsm_flags $f "0x0000000d" + # set no retry action mode + cdt_set_no_retry + do_node $CLIENT2 "$f 10" & + local pid=$! + sleep 3 + echo 'Hi!' > $f + [[ $? == 0 ]] && error "Update during exec of released file must fail" + wait $pid + [[ $? == 0 ]] || error "Execution failed during run" + cmp /bin/sleep $f + [[ $? == 0 ]] || error "Binary overwritten during exec" + + # cleanup + # remove no try action mode + cdt_clear_no_retry + check_hsm_flags $f "0x00000009" + + copytool_cleanup +} +run_test 30c "Update during exec of released file must fail" + restore_and_check_size() { local f=$1 local fid=$2 @@ -2382,7 +2441,7 @@ double_verify_reset_hsm_param() { test_100() { double_verify_reset_hsm_param loop_period double_verify_reset_hsm_param grace_delay - double_verify_reset_hsm_param request_timeout + double_verify_reset_hsm_param active_request_timeout double_verify_reset_hsm_param max_requests double_verify_reset_hsm_param default_archive_id } @@ -2412,7 +2471,7 @@ test_103() { echo "Current requests" local res=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep -v CANCELED | grep -v SUCCEED | grep -v FAILED") [[ -z "$res" ]] || error "Some request have not been canceled" @@ -2434,7 +2493,7 @@ test_104() { cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep $fid | cut -f16 -d=") cdt_enable @@ -2455,12 +2514,12 @@ test_105() { $LFS hsm_archive $DIR/$tdir/$i done local reqcnt1=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep WAITING | wc -l") cdt_restart cdt_disable local reqcnt2=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep WAITING | wc -l") cdt_enable cdt_purge @@ -3133,12 +3192,12 @@ test_250() { while [[ $cnt != 0 || $wt != 0 ]]; do sleep 1 cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep STARTED | grep -v CANCEL | wc -l") [[ $cnt -le $maxrequest ]] || error "$cnt > $maxrequest too many started requests" wt=$(do_facet $SINGLEMDS "$LCTL get_param\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep WAITING | wc -l") echo "max=$maxrequest started=$cnt waiting=$wt" done @@ -3157,8 +3216,8 @@ test_251() { cdt_disable # to have a short test - local old_to=$(get_hsm_param request_timeout) - set_hsm_param request_timeout 4 + local old_to=$(get_hsm_param active_request_timeout) + set_hsm_param active_request_timeout 4 # to be sure the cdt will wake up frequently so # it will be able to cancel the "old" request local old_loop=$(get_hsm_param loop_period) @@ -3170,7 +3229,7 @@ test_251() { sleep 5 wait_request_state $fid ARCHIVE CANCELED - set_hsm_param request_timeout $old_to + set_hsm_param active_request_timeout $old_to set_hsm_param loop_period $old_loop copytool_cleanup