Whamcloud - gitweb
LU-4093 tests: prevent zombie requests when stopping CT
[fs/lustre-release.git] / lustre / tests / sanity-hsm.sh
index d3c9e89..8b89a18 100644 (file)
@@ -13,6 +13,8 @@ export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin
 ONLY=${ONLY:-"$*"}
 # bug number for skipped test:    3815     3939
 ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36 40"
+# bug number for skipped test:4178         4176
+ALWAYS_EXCEPT="$ALWAYS_EXCEPT 200 221 223b 31a"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
@@ -161,10 +163,34 @@ copytool_setup() {
 copytool_cleanup() {
        trap - EXIT
        local agents=${1:-$(facet_active_host $SINGLEAGT)}
+       local mdtno
+       local idx
+       local oldstate
+       local mdt_hsmctrl
 
        do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0
        sleep 1
        echo "Copytool is stopped on $agents"
+
+       # clean all CDTs orphans requests from previous tests
+       # that would otherwise need to timeout to clear.
+       for mdtno in $(seq 1 $MDSCOUNT); do
+               idx=$(($mdtno - 1))
+               mdt_hsmctrl="mdt.$FSNAME-MDT000${idx}.hsm_control"
+               oldstate=$(do_facet mds${mdtno} "$LCTL get_param -n " \
+                                  "$MDT_HSMCTRL")
+               # skip already stop[ed,ing] CDTs
+               echo $oldstate | grep stop || continue
+
+               do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=shutdown"
+               wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \
+                       "stopped" 20 ||
+                       error "mds${mdtno} cdt state is not stopped"
+               do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=$oldstate"
+               wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \
+                       "$oldstate" 20 ||
+                       error "mds${mdtno} cdt state is not $oldstate"
+       done
 }
 
 copytool_suspend() {
@@ -176,7 +202,7 @@ copytool_suspend() {
 
 copytool_remove_backend() {
        local fid=$1
-       local be=$(find $HSM_ARCHIVE -name $fid)
+       local be=$(do_facet $SINGLEAGT find $HSM_ARCHIVE -name $fid)
        echo "Remove from backend: $fid = $be"
        do_facet $SINGLEAGT rm -f $be
 }
@@ -398,7 +424,8 @@ make_small() {
 }
 
 cleanup_large_files() {
-       local ratio=$(df $MOUNT |awk '{print $5}' |sed 's/%//g' |grep -v Use)
+       local ratio=$(df -P $MOUNT | tail -1 | awk '{print $5}' |
+                     sed 's/%//g')
        [ $ratio -gt 50 ] && find $MOUNT -size +10M -exec rm -f {} \;
 }
 
@@ -1079,8 +1106,8 @@ test_13() {
                        CURR_FILE="$CURR_DIR/$tfile.$f"
                        # write file-specific data
                        do_facet $SINGLEAGT \
-                               echo "d=$d, f=$f, dir=$CURR_DIR, "\
-                                    "file=$CURR_FILE" > $CURR_FILE
+                               "echo d=$d, f=$f, dir=$CURR_DIR, "\
+                                       "file=$CURR_FILE > $CURR_FILE"
                done
        done
        # import to Lustre
@@ -1824,6 +1851,41 @@ test_30b() {
 }
 run_test 30b "Restore at exec (release case)"
 
+test_30c() {
+       needclients 2 || return 0
+
+       # test needs a running copytool
+       copytool_setup
+
+       mkdir -p $DIR/$tdir
+       local f=$DIR/$tdir/SLEEP
+       local fid=$(copy_file /bin/sleep $f)
+       chmod 755 $f
+       $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+       wait_request_state $fid ARCHIVE SUCCEED
+       $LFS hsm_release $f
+       check_hsm_flags $f "0x0000000d"
+       # set no retry action mode
+       cdt_set_no_retry
+       do_node $CLIENT2 "$f 10" &
+       local pid=$!
+       sleep 3
+       echo 'Hi!' > $f
+       [[ $? == 0 ]] && error "Update during exec of released file must fail"
+       wait $pid
+       [[ $? == 0 ]] || error "Execution failed during run"
+       cmp /bin/sleep $f
+       [[ $? == 0 ]] || error "Binary overwritten during exec"
+
+       # cleanup
+       # remove no try action mode
+       cdt_clear_no_retry
+       check_hsm_flags $f "0x00000009"
+
+       copytool_cleanup
+}
+run_test 30c "Update during exec of released file must fail"
+
 restore_and_check_size() {
        local f=$1
        local fid=$2