Whamcloud - gitweb
LU-3455 mdt: sync when evicting client 67/11767/14
authorHongchao Zhang <hongchao.zhang@intel.com>
Mon, 20 Oct 2014 16:28:17 +0000 (00:28 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 29 Dec 2014 17:20:36 +0000 (17:20 +0000)
If some client is evicted from the MDT, then the underlying OSD
should be synchronized, otherwise the recovery will be failed
for the replay requests were dropped alongside with the client.

in test_20b in replay-single.sh, the occupied disk space will be
released only after DMUs are committed for ZFS, then it should
wait some more time before calling 'df' to get the disk usage info.

Test-Parameters: alwaysuploadlogs \
envdefinitions=SLOW=yes,ENABLE_QUOTA=yes,ONLY=20b \
mdtfilesystemtype=zfs mdsfilesystemtype=zfs ostfilesystemtype=zfs \
clientcount=4 osscount=2 mdscount=2 \
mdssizegb=10 ostsizegb=10 austeroptions=-R failover=true iscsi=1 \
testlist=replay-single,replay-single,replay-single,replay-single,replay-single

Change-Id: Ic126bd3c58a6d2da1c69c7231e88a8977417d37b
Signed-off-by: Hongchao Zhang <hongchao.zhang@intel.com>
Reviewed-on: http://review.whamcloud.com/11767
Tested-by: Jenkins
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/tests/replay-single.sh
lustre/tests/test-framework.sh

index bb88fc4..3068765 100755 (executable)
@@ -436,9 +436,12 @@ test_20b() { # bug 10480
        mds_evict_client
        client_up || client_up || true    # reconnect
 
        mds_evict_client
        client_up || client_up || true    # reconnect
 
+       do_facet $SINGLEMDS "lctl set_param -n osd*.*MDT*.force_sync 1"
+
        fail $SINGLEMDS                            # start orphan recovery
        wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
        wait_delete_completed_mds $wait_timeout || return 3
        fail $SINGLEMDS                            # start orphan recovery
        wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
        wait_delete_completed_mds $wait_timeout || return 3
+
        AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
        log "before $BEFOREUSED, after $AFTERUSED"
        (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) &&
        AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
        log "before $BEFOREUSED, after $AFTERUSED"
        (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) &&
index 8a273d0..3388723 100755 (executable)
@@ -2064,6 +2064,8 @@ sync_all_data() {
 
 wait_delete_completed_mds() {
        local MAX_WAIT=${1:-20}
 
 wait_delete_completed_mds() {
        local MAX_WAIT=${1:-20}
+       # for ZFS, waiting more time for DMUs to be committed
+       local ZFS_WAIT=${2:-5}
        local mds2sync=""
        local stime=$(date +%s)
        local etime
        local mds2sync=""
        local stime=$(date +%s)
        local etime
@@ -2099,6 +2101,14 @@ wait_delete_completed_mds() {
                if [[ $changes -eq 0 ]]; then
                        etime=$(date +%s)
                        #echo "delete took $((etime - stime)) seconds"
                if [[ $changes -eq 0 ]]; then
                        etime=$(date +%s)
                        #echo "delete took $((etime - stime)) seconds"
+
+                       # the occupied disk space will be released
+                       # only after DMUs are committed
+                       if [[ $(facet_fstype $SINGLEMDS) == zfs ]]; then
+                               echo "sleep $ZFS_WAIT for ZFS OSD"
+                               sleep $ZFS_WAIT
+                       fi
+
                        return
                fi
                sleep 1
                        return
                fi
                sleep 1