Whamcloud - gitweb
LU-1526 tests: Support for MDS-initiated OST_DESTROYs
authorLi Wei <liwei@whamcloud.com>
Fri, 14 Dec 2012 08:34:21 +0000 (16:34 +0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 19 Dec 2012 19:08:29 +0000 (14:08 -0500)
This patch is backported from commit af5f388 of LU-1303 to
support interoperating with 2.4 server.

The patch makes sure the tests work with MDSs that destroy OST
objects asynchronously on behalf of clients.

Signed-off-by: Li Wei <liwei@whamcloud.com>
Signed-off-by: Yu Jian <yujian@whamcloud.com>
Change-Id: I375d33b53e88a1649b68c1a3999d273d759d17b7
Reviewed-on: http://review.whamcloud.com/4832
Tested-by: Hudson
Reviewed-by: Li Wei <wei.g.li@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
lustre/tests/oos.sh
lustre/tests/replay-dual.sh
lustre/tests/replay-ost-single.sh
lustre/tests/replay-single.sh
lustre/tests/sanity.sh
lustre/tests/sanityn.sh
lustre/tests/test-framework.sh

index c547b63..7930e59 100755 (executable)
@@ -88,6 +88,8 @@ sync; sleep 1; sync
 
 sync; sleep 3; sync
 
+wait_delete_completed 300
+
 if [ $SUCCESS -eq 1 ]; then
        echo "Success!"
        rm -f $LOG
index 6f23e9a..1356afb 100755 (executable)
@@ -284,7 +284,7 @@ run_test 13 "close resend timeout"
 
 test_14b() {
     wait_mds_ost_sync
-    wait_destroy_complete
+    wait_delete_completed
     BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
     mkdir -p $MOUNT1/$tdir
     $SETSTRIPE -o 0 $MOUNT1/$tdir
@@ -306,7 +306,7 @@ test_14b() {
     zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
 
     wait_mds_ost_sync || return 4
-    wait_destroy_complete || return 5
+    wait_delete_completed || return 5
 
     AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
     log "before $BEFOREUSED, after $AFTERUSED"
index 6627e40..f7321c2 100755 (executable)
@@ -197,7 +197,7 @@ test_6() {
     sync
     # let the delete happen
     wait_mds_ost_sync || return 4
-    wait_destroy_complete || return 5
+    wait_delete_completed || return 5
     after=`kbytesfree`
     log "before: $before after: $after"
     (( $before <= $after + 40 )) || return 3   # take OST logs into account
@@ -224,7 +224,7 @@ test_7() {
     sync
     # let the delete happen
     wait_mds_ost_sync || return 4
-    wait_destroy_complete || return 5
+    wait_delete_completed || return 5
     after=`kbytesfree`
     log "before: $before after: $after"
     (( $before <= $after + 40 )) || return 3   # take OST logs into account
index 1670e15..46ca89e 100755 (executable)
@@ -2217,7 +2217,7 @@ test_89() {
         mkdir -p $DIR/$tdir
         rm -f $DIR/$tdir/$tfile
         wait_mds_ost_sync
-        wait_destroy_complete
+        wait_delete_completed
         BLOCKS1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
         lfs setstripe -i 0 -c 1 $DIR/$tdir/$tfile
         dd if=/dev/zero bs=1M count=10 of=$DIR/$tdir/$tfile
@@ -2230,6 +2230,7 @@ test_89() {
         zconf_mount $(hostname) $MOUNT
         client_up || return 1
         wait_mds_ost_sync
+        wait_delete_completed
         BLOCKS2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
         [ "$BLOCKS1" == "$BLOCKS2" ] || error $((BLOCKS2 - BLOCKS1)) blocks leaked
 }
index 1d2720d..24c248b 100644 (file)
@@ -1099,6 +1099,7 @@ reset_enospc() {
        [ "$OSTIDX" ] && list=$(facet_host ost$((OSTIDX + 1)))
 
        do_nodes $list lctl set_param fail_loc=0
+       sync    # initiate all OST_DESTROYs from MDS to OST
        sleep_maxage
 }
 
@@ -4840,6 +4841,7 @@ test_101d() {
 
     set_read_ahead $old_READAHEAD
     rm -f $file
+    wait_delete_completed
 
     [ $time_ra_ON -lt $time_ra_OFF ] ||
         error "read-ahead enabled  time read (${time_ra_ON}s) is more than
@@ -7079,6 +7081,7 @@ test_133c() {
        $LFS setstripe -c 1 -o 0 ${testdir}/${tfile}
        sync
        cancel_lru_locks osc
+       wait_delete_completed
 
        # clear stats.
        do_facet $SINGLEMDS $LCTL set_param mdt.*.md_stats=clear
@@ -7096,6 +7099,7 @@ test_133c() {
        check_stats ost "punch" 1
 
        rm -f ${testdir}/${tfile} || error "file remove failed"
+       wait_delete_completed
        check_stats ost "destroy" 1
 
        rm -rf $DIR/${tdir}
index 71db183..981691d 100644 (file)
@@ -322,6 +322,7 @@ run_test 14d "chmod of executing file is still possible ========"
 test_15() {    # bug 974 - ENOSPC
        echo "PATH=$PATH"
        sh oos2.sh $MOUNT1 $MOUNT2
+       wait_delete_completed
        grant_error=`dmesg | grep "> available"`
        [ -z "$grant_error" ] || error "$grant_error"
 }
@@ -612,6 +613,11 @@ run_test 31a "voluntary cancel / blocking ast race=============="
 test_31b() {
         remote_ost || { skip "local OST" && return 0; }
         remote_ost_nodsh && skip "remote OST w/o dsh" && return 0
+
+        # make sure there is no local locks due to destroy
+        wait_mds_ost_sync || error "wait_mds_ost_sync()"
+        wait_delete_completed || error "wait_delete_completed()"
+
         mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir"
         lfs setstripe $DIR/$tdir/$tfile -i 0 -c 1
         cp /etc/hosts $DIR/$tdir/$tfile
@@ -915,7 +921,7 @@ test_36() { #bug 16417
         rm -f $DIR1/$tdir/file000
         kill -USR1 $read_pid
         wait $read_pid
-        sleep 1
+        wait_delete_completed
         local after=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }')
         echo "*** cycle($i) *** before($before):after_dd($after_dd):after($after)"
         # this free space! not used
index d94010e..29b1ef6 100644 (file)
@@ -1503,23 +1503,55 @@ sync_all_data() {
     fi
 }
 
-wait_delete_completed () {
-    local TOTALPREV=`lctl get_param -n osc.*.kbytesavail | \
-                     awk 'BEGIN{total=0}; {total+=$1}; END{print total}'`
+wait_delete_completed_mds() {
+       [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.58) ] &&
+               return 0
+
+       local MAX_WAIT=${1:-20}
+       local mds2sync=""
+       local stime=`date +%s`
+       local etime
+       local node
+       local changes
+
+       # find MDS with pending deletions
+       for node in $(mdts_nodes); do
+               changes=$(do_node $node "lctl get_param -n osc.*MDT*.sync_*" \
+                       2>/dev/null | calc_sum)
+               if [ -z "$changes" ] || [ $changes -eq 0 ]; then
+                       continue
+               fi
+               mds2sync="$mds2sync $node"
+       done
+       if [ "$mds2sync" == "" ]; then
+               return
+       fi
+       mds2sync=$(comma_list $mds2sync)
+
+       # sync MDS transactions
+       do_nodes $mds2sync "lctl set_param -n osd*.*MD*.force_sync 1"
+
+       # wait till all changes are sent and commmitted by OSTs
+       # for ldiskfs space is released upon execution, but DMU
+       # do this upon commit
+
+       local WAIT=0
+       while [ "$WAIT" -ne "$MAX_WAIT" ]; do
+               changes=$(do_nodes $mds2sync "lctl get_param -n osc.*MDT*.sync_*" \
+                       | calc_sum)
+               #echo "$node: $changes changes on all"
+               if [ "$changes" -eq "0" ]; then
+                       etime=`date +%s`
+                       #echo "delete took $((etime - stime)) seconds"
+                       return
+               fi
+               sleep 1
+               WAIT=$(( WAIT + 1))
+       done
 
-    local WAIT=0
-    local MAX_WAIT=20
-    while [ "$WAIT" -ne "$MAX_WAIT" ]; do
-        sleep 1
-        TOTAL=`lctl get_param -n osc.*.kbytesavail | \
-               awk 'BEGIN{total=0}; {total+=$1}; END{print total}'`
-        [ "$TOTAL" -eq "$TOTALPREV" ] && return 0
-        echo "Waiting delete completed ... prev: $TOTALPREV current: $TOTAL "
-        TOTALPREV=$TOTAL
-        WAIT=$(( WAIT + 1))
-    done
-    echo "Delete is not completed in $MAX_WAIT sec"
-    return 1
+       etime=`date +%s`
+       echo "Delete is not completed in $((etime - stime)) seconds"
+       do_nodes $mds2sync "lctl get_param osc.*MDT*.sync_*"
 }
 
 wait_for_host() {
@@ -1613,28 +1645,35 @@ wait_mds_ost_sync () {
 }
 
 wait_destroy_complete () {
-    echo "Waiting for destroy to be done..."
-    # MAX value shouldn't be big as this mean server responsiveness
-    # never increase this just to make test pass but investigate
-    # why it takes so long time
-    local MAX=5
-    local WAIT=0
-    while [ $WAIT -lt $MAX ]; do
-        local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight))
-        local con=1
-        for ((i=0; i<${#RPCs[@]}; i++)); do
-            [ ${RPCs[$i]} -eq 0 ] && continue
-            # there are still some destroy RPCs in flight
-            con=0
-            break;
-        done
-        sleep 1
-        [ ${con} -eq 1 ] && return 0 # done waiting
-        echo "Waiting $WAIT secs for destroys to be done."
-        WAIT=$((WAIT + 1))
-    done
-    echo "Destroys weren't done in $MAX sec."
-    return 1
+       echo "Waiting for local destroys to complete"
+       # MAX value shouldn't be big as this mean server responsiveness
+       # never increase this just to make test pass but investigate
+       # why it takes so long time
+       local MAX=5
+       local WAIT=0
+       while [ $WAIT -lt $MAX ]; do
+               local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight))
+               local con=1
+               local i
+
+               for ((i=0; i<${#RPCs[@]}; i++)); do
+                       [ ${RPCs[$i]} -eq 0 ] && continue
+                       # there are still some destroy RPCs in flight
+                       con=0
+                       break;
+               done
+               sleep 1
+               [ ${con} -eq 1 ] && return 0 # done waiting
+               echo "Waiting ${WAIT}s for local destroys to complete"
+               WAIT=$((WAIT + 1))
+       done
+       echo "Local destroys weren't done in $MAX sec."
+       return 1
+}
+
+wait_delete_completed() {
+       wait_delete_completed_mds $1 || return $?
+       wait_destroy_complete
 }
 
 wait_exit_ST () {