Whamcloud - gitweb
LU-3455 mdt: sync when evicting client 31/13231/2
authorHongchao Zhang <hongchao.zhang@intel.com>
Sun, 4 Jan 2015 05:55:03 +0000 (21:55 -0800)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 15 Jan 2015 04:44:21 +0000 (04:44 +0000)
If some client is evicted from the MDT, then the underlying OSD
should be synchronized, otherwise the recovery will be failed
for the replay requests were dropped alongside with the client.

in test_20b in replay-single.sh, the occupied disk space will be
released only after DMUs are committed for ZFS, then it should
wait some more time before calling 'df' to get the disk usage info.

This patch is back-ported from the following one:
Lustre-commit: fabd01968d0fac83e4855da5a5dfe8f68c038ceb
Lustre-change: http://review.whamcloud.com/11767

Test-Parameters: alwaysuploadlogs \
envdefinitions=SLOW=yes,ENABLE_QUOTA=yes,ONLY=20b \
mdtfilesystemtype=zfs mdsfilesystemtype=zfs ostfilesystemtype=zfs \
clientcount=4 osscount=2 mdscount=2 \
mdssizegb=10 ostsizegb=10 austeroptions=-R failover=true iscsi=1 \
testlist=replay-single,replay-single,replay-single,replay-single,replay-single

Signed-off-by: Hongchao Zhang <hongchao.zhang@intel.com>
Change-Id: Ia0416e57d009f2fa00c880c6ea41f9d0bd60fef0
Reviewed-on: http://review.whamcloud.com/13231
Tested-by: Jenkins
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Bob Glossman <bob.glossman@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/tests/replay-single.sh
lustre/tests/test-framework.sh

index 3086976..10dc4bd 100755 (executable)
@@ -440,9 +440,12 @@ test_20b() { # bug 10480
        mds_evict_client
        client_up || client_up || true    # reconnect
 
+       do_facet $SINGLEMDS "lctl set_param -n osd*.*MDT*.force_sync 1"
+
        fail $SINGLEMDS                            # start orphan recovery
        wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
        wait_delete_completed_mds $wait_timeout || return 3
+
        AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
        log "before $BEFOREUSED, after $AFTERUSED"
        (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) &&
index 4b68a25..f51f8e8 100644 (file)
@@ -2003,6 +2003,8 @@ sync_all_data() {
 
 wait_delete_completed_mds() {
        local MAX_WAIT=${1:-20}
+       # for ZFS, waiting more time for DMUs to be committed
+       local ZFS_WAIT=${2:-5}
        local mds2sync=""
        local stime=$(date +%s)
        local etime
@@ -2038,6 +2040,14 @@ wait_delete_completed_mds() {
                if [[ $changes -eq 0 ]]; then
                        etime=$(date +%s)
                        #echo "delete took $((etime - stime)) seconds"
+
+                       # the occupied disk space will be released
+                       # only after DMUs are committed
+                       if [[ $(facet_fstype $SINGLEMDS) == zfs ]]; then
+                               echo "sleep $ZFS_WAIT for ZFS OSD"
+                               sleep $ZFS_WAIT
+                       fi
+
                        return
                fi
                sleep 1