From: Hongchao Zhang Date: Sun, 4 Jan 2015 05:55:03 +0000 (-0800) Subject: LU-3455 mdt: sync when evicting client X-Git-Tag: 2.5.3.90~22 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F31%2F13231%2F2;p=fs%2Flustre-release.git LU-3455 mdt: sync when evicting client If some client is evicted from the MDT, then the underlying OSD should be synchronized, otherwise the recovery will be failed for the replay requests were dropped alongside with the client. in test_20b in replay-single.sh, the occupied disk space will be released only after DMUs are committed for ZFS, then it should wait some more time before calling 'df' to get the disk usage info. This patch is back-ported from the following one: Lustre-commit: fabd01968d0fac83e4855da5a5dfe8f68c038ceb Lustre-change: http://review.whamcloud.com/11767 Test-Parameters: alwaysuploadlogs \ envdefinitions=SLOW=yes,ENABLE_QUOTA=yes,ONLY=20b \ mdtfilesystemtype=zfs mdsfilesystemtype=zfs ostfilesystemtype=zfs \ clientcount=4 osscount=2 mdscount=2 \ mdssizegb=10 ostsizegb=10 austeroptions=-R failover=true iscsi=1 \ testlist=replay-single,replay-single,replay-single,replay-single,replay-single Signed-off-by: Hongchao Zhang Change-Id: Ia0416e57d009f2fa00c880c6ea41f9d0bd60fef0 Reviewed-on: http://review.whamcloud.com/13231 Tested-by: Jenkins Reviewed-by: Nathaniel Clark Tested-by: Maloo Reviewed-by: Bob Glossman Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 3086976..10dc4bd 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -440,9 +440,12 @@ test_20b() { # bug 10480 mds_evict_client client_up || client_up || true # reconnect + do_facet $SINGLEMDS "lctl set_param -n osd*.*MDT*.force_sync 1" + fail $SINGLEMDS # start orphan recovery wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" wait_delete_completed_mds $wait_timeout || return 3 + AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }') log "before $BEFOREUSED, after $AFTERUSED" (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) && diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 4b68a25..f51f8e8 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -2003,6 +2003,8 @@ sync_all_data() { wait_delete_completed_mds() { local MAX_WAIT=${1:-20} + # for ZFS, waiting more time for DMUs to be committed + local ZFS_WAIT=${2:-5} local mds2sync="" local stime=$(date +%s) local etime @@ -2038,6 +2040,14 @@ wait_delete_completed_mds() { if [[ $changes -eq 0 ]]; then etime=$(date +%s) #echo "delete took $((etime - stime)) seconds" + + # the occupied disk space will be released + # only after DMUs are committed + if [[ $(facet_fstype $SINGLEMDS) == zfs ]]; then + echo "sleep $ZFS_WAIT for ZFS OSD" + sleep $ZFS_WAIT + fi + return fi sleep 1