Whamcloud - gitweb
LU-5079 tests: fix service_time in max_recovery_time() 14/12714/5
authorJian Yu <jian.yu@intel.com>
Fri, 14 Nov 2014 03:29:00 +0000 (19:29 -0800)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 27 Nov 2014 14:08:20 +0000 (14:08 +0000)
This patch fixes the calculation of service_time in
max_recovery_time() to use the new method in
check_and_start_recovery_timer() and new values of
CONNECTION_SWITCH_MAX and CONNECTION_SWITCH_INC.

The patch also fixes replay-dual sub-tests:
- to call wait_clients_import_state() instead of sleeping
  uncertain time in test_11()
- to add some margin into the recovery time comparison
  in test_20()

Backport to b2_5 from master
Lustre-change: http://review.whamcloud.com/12724

Test-Parameters: alwaysuploadlogs \
envdefinitions=SLOW=yes,ENABLE_QUOTA=yes \
mdtfilesystemtype=ldiskfs mdsfilesystemtype=ldiskfs \
ostfilesystemtype=ldiskfs mdtcount=1 \
testlist=replay-dual,replay-dual

Test-Parameters: alwaysuploadlogs \
envdefinitions=SLOW=yes,ENABLE_QUOTA=yes \
mdtfilesystemtype=zfs mdsfilesystemtype=zfs \
ostfilesystemtype=zfs mdtcount=1 \
testlist=replay-dual,replay-dual

Test-Parameters: alwaysuploadlogs \
envdefinitions=SLOW=yes,ENABLE_QUOTA=yes \
mdtfilesystemtype=ldiskfs mdsfilesystemtype=ldiskfs \
ostfilesystemtype=ldiskfs mdtcount=4 \
testlist=replay-dual,replay-dual

Signed-off-by: Jian Yu <jian.yu@intel.com>
Change-Id: I8ff0385c508dc4aea883f4159f3f9e55a216527c
Reviewed-on: http://review.whamcloud.com/12714
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
lustre/tests/replay-dual.sh
lustre/tests/test-framework.sh

index 3bc1b6c..1a12cd5 100755 (executable)
@@ -254,23 +254,25 @@ test_10() {
 run_test 10 "resending a replayed unlink"
 
 test_11() {
-    replay_barrier $SINGLEMDS
-    mcreate $MOUNT1/$tfile-1
-    mcreate $MOUNT2/$tfile-2
-    mcreate $MOUNT1/$tfile-3
-    mcreate $MOUNT2/$tfile-4
-    mcreate $MOUNT1/$tfile-5
-    # drop all reint replies for a while
-    do_facet $SINGLEMDS lctl set_param fail_loc=0x0119
-    # note that with this fail_loc set, facet_failover df will fail
-    facet_failover $SINGLEMDS
-    #sleep for while, let both clients reconnect and timeout
-    sleep $((TIMEOUT * 2))
-    do_facet $SINGLEMDS lctl set_param fail_loc=0
+       replay_barrier $SINGLEMDS
+       mcreate $MOUNT1/$tfile-1
+       mcreate $MOUNT2/$tfile-2
+       mcreate $MOUNT1/$tfile-3
+       mcreate $MOUNT2/$tfile-4
+       mcreate $MOUNT1/$tfile-5
+       # drop all reint replies for a while
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0119
+       # note that with this fail_loc set, facet_failover df will fail
+       facet_failover $SINGLEMDS
 
-    rm $MOUNT1/$tfile-[1-5] || return 1
+       local clients=${CLIENTS:-$HOSTNAME}
+       wait_clients_import_state "$clients" $SINGLEMDS FULL
 
-    return 0
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+       rm $MOUNT1/$tfile-[1-5] || return 1
+
+       return 0
 }
 run_test 11 "both clients timeout during replay"
 
@@ -485,9 +487,10 @@ test_20() { #16389
     rm $MOUNT1/a
     zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
     TIER2=$((`date +%s` - BEFORE))
-    [ $TIER2 -ge $((TIER1 * 2)) ] && \
-        error "recovery time is growing $TIER2 > $TIER1"
-    return 0
+
+       [[ $TIER2 -ge $((TIER1 * 2 + (TIER1 >> 2))) ]] &&
+               error "recovery time is growing $TIER2 > $TIER1"
+       return 0
 }
 run_test 20 "recovery time is not increasing"
 
index f1f2725..1145118 100644 (file)
@@ -5828,22 +5828,25 @@ do_ls () {
     return $rc
 }
 
-# target_start_and_reset_recovery_timer()
-#        service_time = at_est2timeout(service_time);
-#        service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC +
-#                             INITIAL_CONNECT_TIMEOUT);
-# CONNECTION_SWITCH_MAX : min(25U, max(CONNECTION_SWITCH_MIN,obd_timeout))
-#define CONNECTION_SWITCH_INC 1
+# check_and_start_recovery_timer()
+#      service_time = at_est2timeout(service_time);
+#      service_time += 2 * INITIAL_CONNECT_TIMEOUT;
+#      service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC);
+
 #define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20)
+#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN,obd_timeout))
 #define CONNECTION_SWITCH_MIN 5U
+#define CONNECTION_SWITCH_INC 5
+max_recovery_time() {
+       local init_connect_timeout=$(( TIMEOUT / 20 ))
+       [[ $init_connect_timeout -ge 5 ]] || init_connect_timeout=5
 
-max_recovery_time () {
-    local init_connect_timeout=$(( TIMEOUT / 20 ))
-    [[ $init_connect_timeout -ge 5 ]] || init_connect_timeout=5
-
-    local service_time=$(( $(at_max_get client) + $(( 2 * $(( 25 + 1  + init_connect_timeout)) )) ))
+       local service_time=$(at_max_get client)
+       service_time=$(( service_time + (service_time >> 2) + 5 ))
+       service_time=$(( service_time + $(( 2 * init_connect_timeout )) ))
+       service_time=$(( service_time + $(( 2 * $(( 50 + 5 )) )) ))
 
-    echo $service_time 
+       echo -n $service_time
 }
 
 get_clients_mount_count () {