From: Jian Yu Date: Fri, 14 Nov 2014 03:29:00 +0000 (-0800) Subject: LU-5079 tests: fix service_time in max_recovery_time() X-Git-Tag: 2.5.3.90~60 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=4e8def3e32ad76808a5b8336d43430a5318e20aa LU-5079 tests: fix service_time in max_recovery_time() This patch fixes the calculation of service_time in max_recovery_time() to use the new method in check_and_start_recovery_timer() and new values of CONNECTION_SWITCH_MAX and CONNECTION_SWITCH_INC. The patch also fixes replay-dual sub-tests: - to call wait_clients_import_state() instead of sleeping uncertain time in test_11() - to add some margin into the recovery time comparison in test_20() Backport to b2_5 from master Lustre-change: http://review.whamcloud.com/12724 Test-Parameters: alwaysuploadlogs \ envdefinitions=SLOW=yes,ENABLE_QUOTA=yes \ mdtfilesystemtype=ldiskfs mdsfilesystemtype=ldiskfs \ ostfilesystemtype=ldiskfs mdtcount=1 \ testlist=replay-dual,replay-dual Test-Parameters: alwaysuploadlogs \ envdefinitions=SLOW=yes,ENABLE_QUOTA=yes \ mdtfilesystemtype=zfs mdsfilesystemtype=zfs \ ostfilesystemtype=zfs mdtcount=1 \ testlist=replay-dual,replay-dual Test-Parameters: alwaysuploadlogs \ envdefinitions=SLOW=yes,ENABLE_QUOTA=yes \ mdtfilesystemtype=ldiskfs mdsfilesystemtype=ldiskfs \ ostfilesystemtype=ldiskfs mdtcount=4 \ testlist=replay-dual,replay-dual Signed-off-by: Jian Yu Change-Id: I8ff0385c508dc4aea883f4159f3f9e55a216527c Reviewed-on: http://review.whamcloud.com/12714 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Andreas Dilger --- diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 3bc1b6c..1a12cd5 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -254,23 +254,25 @@ test_10() { run_test 10 "resending a replayed unlink" test_11() { - replay_barrier $SINGLEMDS - mcreate $MOUNT1/$tfile-1 - mcreate $MOUNT2/$tfile-2 - mcreate $MOUNT1/$tfile-3 - mcreate $MOUNT2/$tfile-4 - mcreate $MOUNT1/$tfile-5 - # drop all reint replies for a while - do_facet $SINGLEMDS lctl set_param fail_loc=0x0119 - # note that with this fail_loc set, facet_failover df will fail - facet_failover $SINGLEMDS - #sleep for while, let both clients reconnect and timeout - sleep $((TIMEOUT * 2)) - do_facet $SINGLEMDS lctl set_param fail_loc=0 + replay_barrier $SINGLEMDS + mcreate $MOUNT1/$tfile-1 + mcreate $MOUNT2/$tfile-2 + mcreate $MOUNT1/$tfile-3 + mcreate $MOUNT2/$tfile-4 + mcreate $MOUNT1/$tfile-5 + # drop all reint replies for a while + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0119 + # note that with this fail_loc set, facet_failover df will fail + facet_failover $SINGLEMDS - rm $MOUNT1/$tfile-[1-5] || return 1 + local clients=${CLIENTS:-$HOSTNAME} + wait_clients_import_state "$clients" $SINGLEMDS FULL - return 0 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + + rm $MOUNT1/$tfile-[1-5] || return 1 + + return 0 } run_test 11 "both clients timeout during replay" @@ -485,9 +487,10 @@ test_20() { #16389 rm $MOUNT1/a zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" TIER2=$((`date +%s` - BEFORE)) - [ $TIER2 -ge $((TIER1 * 2)) ] && \ - error "recovery time is growing $TIER2 > $TIER1" - return 0 + + [[ $TIER2 -ge $((TIER1 * 2 + (TIER1 >> 2))) ]] && + error "recovery time is growing $TIER2 > $TIER1" + return 0 } run_test 20 "recovery time is not increasing" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index f1f2725..1145118 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -5828,22 +5828,25 @@ do_ls () { return $rc } -# target_start_and_reset_recovery_timer() -# service_time = at_est2timeout(service_time); -# service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + -# INITIAL_CONNECT_TIMEOUT); -# CONNECTION_SWITCH_MAX : min(25U, max(CONNECTION_SWITCH_MIN,obd_timeout)) -#define CONNECTION_SWITCH_INC 1 +# check_and_start_recovery_timer() +# service_time = at_est2timeout(service_time); +# service_time += 2 * INITIAL_CONNECT_TIMEOUT; +# service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC); + #define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20) +#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN,obd_timeout)) #define CONNECTION_SWITCH_MIN 5U +#define CONNECTION_SWITCH_INC 5 +max_recovery_time() { + local init_connect_timeout=$(( TIMEOUT / 20 )) + [[ $init_connect_timeout -ge 5 ]] || init_connect_timeout=5 -max_recovery_time () { - local init_connect_timeout=$(( TIMEOUT / 20 )) - [[ $init_connect_timeout -ge 5 ]] || init_connect_timeout=5 - - local service_time=$(( $(at_max_get client) + $(( 2 * $(( 25 + 1 + init_connect_timeout)) )) )) + local service_time=$(at_max_get client) + service_time=$(( service_time + (service_time >> 2) + 5 )) + service_time=$(( service_time + $(( 2 * init_connect_timeout )) )) + service_time=$(( service_time + $(( 2 * $(( 50 + 5 )) )) )) - echo $service_time + echo -n $service_time } get_clients_mount_count () {