From: Andreas Dilger Date: Fri, 29 Jun 2012 05:22:42 +0000 (-0600) Subject: LU-1582 tests: do not force sync before failover X-Git-Tag: 2.2.92~23 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=5458e96d82a103315095c234c22de0efff7956af LU-1582 tests: do not force sync before failover Commit 17a69cf25ed0991e04d85c259f4294dc59734e1e forced sync on every target on a facet before it was failed, in order to fix failures in recovery-small.sh test_105() due to initial client connect failing. This was causing local-node testing to hang in replay-ost-single.sh test_8b() because the client mountpoint was being sync'd and would never finish due to a fail_loc preventing any RPCs from completing. Syncing the targets before failover also defeats the purpose of many recovery tests. Instead, recovery-small test_105 creates a file on the to-be-failed OST and writes to it, to trigger the exp_need_sync and commit the new client export, without incorrectly forcing every OST to be sync'd for every test that is using facet_failover(). Signed-off-by: Andreas Dilger Change-Id: Ied5f2ecf1f6523d4163916b24e5c4281902d500c Reviewed-on: http://review.whamcloud.com/3239 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Jinshan Xiong Reviewed-by: Niu Yawei --- diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 6a2dc96..ee03c9a 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1390,6 +1390,12 @@ test_105() local ir_state=$(check_cli_ir_state $rcli) [ $ir_state = OFF ] || error "IR state must be OFF at $rcli" + # Since the client just mounted, its last_rcvd entry is not on disk. + # Send an RPC so exp_need_sync forces last_rcvd to commit this export + # so the client can reconnect during OST recovery (LU-924, LU-1582) + $SETSTRIPE -i 0 $DIR/$tfile + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 conv=sync + # make sure MGS's state is Partial [ $(get_ir_status) = "partial" ] || error "MGS IR state must be partial" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index eb58b8e..eb3d249 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -1863,14 +1863,6 @@ facet_failover() { echo "Failing $facet on node $host" - # Make sure the client data is synced to disk. LU-924 - # - # We don't write client data synchrnously (to avoid flooding sync writes - # when there are many clients connecting), so if the server reboots before - # the client data reachs disk, the client data will be lost and the client - # will be evicted after recovery, which is not what we expected. - do_facet $facet "sync; sync; sync" - local affected=$(affected_facets $facet) shutdown_facet $facet