From: Hongchao Zhang Date: Mon, 20 Oct 2014 16:28:17 +0000 (+0800) Subject: LU-3455 mdt: sync when evicting client X-Git-Tag: 2.6.92~8 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=fabd01968d0fac83e4855da5a5dfe8f68c038ceb LU-3455 mdt: sync when evicting client If some client is evicted from the MDT, then the underlying OSD should be synchronized, otherwise the recovery will be failed for the replay requests were dropped alongside with the client. in test_20b in replay-single.sh, the occupied disk space will be released only after DMUs are committed for ZFS, then it should wait some more time before calling 'df' to get the disk usage info. Test-Parameters: alwaysuploadlogs \ envdefinitions=SLOW=yes,ENABLE_QUOTA=yes,ONLY=20b \ mdtfilesystemtype=zfs mdsfilesystemtype=zfs ostfilesystemtype=zfs \ clientcount=4 osscount=2 mdscount=2 \ mdssizegb=10 ostsizegb=10 austeroptions=-R failover=true iscsi=1 \ testlist=replay-single,replay-single,replay-single,replay-single,replay-single Change-Id: Ic126bd3c58a6d2da1c69c7231e88a8977417d37b Signed-off-by: Hongchao Zhang Reviewed-on: http://review.whamcloud.com/11767 Tested-by: Jenkins Reviewed-by: Nathaniel Clark Tested-by: Maloo Reviewed-by: Jian Yu Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index bb88fc4..3068765 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -436,9 +436,12 @@ test_20b() { # bug 10480 mds_evict_client client_up || client_up || true # reconnect + do_facet $SINGLEMDS "lctl set_param -n osd*.*MDT*.force_sync 1" + fail $SINGLEMDS # start orphan recovery wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" wait_delete_completed_mds $wait_timeout || return 3 + AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }') log "before $BEFOREUSED, after $AFTERUSED" (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) && diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 8a273d0..3388723 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -2064,6 +2064,8 @@ sync_all_data() { wait_delete_completed_mds() { local MAX_WAIT=${1:-20} + # for ZFS, waiting more time for DMUs to be committed + local ZFS_WAIT=${2:-5} local mds2sync="" local stime=$(date +%s) local etime @@ -2099,6 +2101,14 @@ wait_delete_completed_mds() { if [[ $changes -eq 0 ]]; then etime=$(date +%s) #echo "delete took $((etime - stime)) seconds" + + # the occupied disk space will be released + # only after DMUs are committed + if [[ $(facet_fstype $SINGLEMDS) == zfs ]]; then + echo "sleep $ZFS_WAIT for ZFS OSD" + sleep $ZFS_WAIT + fi + return fi sleep 1