Whamcloud - gitweb
LU-16002 ptlrpc: reduce pinger eviction time 28/47928/10
authorAlexander Boyko <alexander.boyko@hpe.com>
Fri, 16 Sep 2022 08:00:38 +0000 (04:00 -0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 15 Oct 2022 05:54:34 +0000 (05:54 +0000)
On a server side eviction is based on PING_INTERVAL. A client
should be evicted after PING_EVICT_TIMEOUT. But eviction logic
adds additional 3 PING_INTERVAL for it. For a configuration
with obd_timeout equal to 300, addition is 225 seconds.
The second level timeout is needed when network is down for
some time. And it prevents clients evictions after first
connection.
Patch adds additional logic to check if an import is active,
and evict client faster without second level. It reduces an
eviction timeout to a PING_EVICT_TIMEOUT.

replay_dual test_0a  is based on a client eviction during recovery,
lfs df check could fail because of eviction. So complete check
similar to recovery-small.sh

Test-Parameters: testlist=recovery-small env=RECOVERY_SMALL_EXCEPT=144 serverversion=2.14
HPE-bug-id: LUS-11054
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: I4d60046ef4737f9cf95a16ac0ab63a36859b8adc
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/47928
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ptlrpc/service.c
lustre/tests/recovery-small.sh
lustre/tests/replay-dual.sh

index 03694f1..4135de3 100644 (file)
@@ -1112,9 +1112,9 @@ static void ptlrpc_server_finish_active_request(
  */
 void ptlrpc_update_export_timer(struct obd_export *exp, time64_t extra_delay)
 {
-       struct obd_export *oldest_exp;
-       time64_t oldest_time, new_time;
-
+       struct obd_export *oldest_exp, *newest_exp;
+       time64_t oldest_time, current_time;
+       bool    evict = false;
        ENTRY;
 
        LASSERT(exp);
@@ -1128,11 +1128,12 @@ void ptlrpc_update_export_timer(struct obd_export *exp, time64_t extra_delay)
         */
 
        /* Do not pay attention on 1sec or smaller renewals. */
-       new_time = ktime_get_real_seconds() + extra_delay;
-       if (exp->exp_last_request_time + 1 /*second */ >= new_time)
+       current_time = ktime_get_real_seconds();
+       /* 1 seconds */
+       if (exp->exp_last_request_time + 1 >= current_time + extra_delay)
                RETURN_EXIT;
 
-       exp->exp_last_request_time = new_time;
+       exp->exp_last_request_time = current_time + extra_delay;
 
        /*
         * exports may get disconnected from the chain even though the
@@ -1147,25 +1148,32 @@ void ptlrpc_update_export_timer(struct obd_export *exp, time64_t extra_delay)
                RETURN_EXIT;
        }
 
+       newest_exp = list_entry(exp->exp_obd->obd_exports_timed.prev,
+                               struct obd_export, exp_obd_chain_timed);
+
        list_move_tail(&exp->exp_obd_chain_timed,
                       &exp->exp_obd->obd_exports_timed);
 
+       if (exp->exp_obd->obd_recovering) {
+               /* be nice to everyone during recovery */
+               spin_unlock(&exp->exp_obd->obd_dev_lock);
+               RETURN_EXIT;
+       }
+
        oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next,
                                struct obd_export, exp_obd_chain_timed);
+
        oldest_time = oldest_exp->exp_last_request_time;
-       spin_unlock(&exp->exp_obd->obd_dev_lock);
 
-       if (exp->exp_obd->obd_recovering) {
-               /* be nice to everyone during recovery */
-               EXIT;
-               return;
-       }
+       /* Check if the oldest entry is expired. */
+       if (exp->exp_obd->obd_eviction_timer == 0 &&
+           current_time > oldest_time + PING_EVICT_TIMEOUT + extra_delay) {
 
-       /* Note - racing to start/reset the obd_eviction timer is safe */
-       if (exp->exp_obd->obd_eviction_timer == 0) {
-               /* Check if the oldest entry is expired. */
-               if (ktime_get_real_seconds() >
-                   oldest_time + PING_EVICT_TIMEOUT + extra_delay) {
+               if (current_time < newest_exp->exp_last_request_time +
+                            PING_EVICT_TIMEOUT / 2) {
+                       /* If import is active - evict stale clients */
+                       evict = true;
+               } else {
                        /*
                         * We need a second timer, in case the net was down and
                         * it just came back. Since the pinger may skip every
@@ -1177,7 +1185,15 @@ void ptlrpc_update_export_timer(struct obd_export *exp, time64_t extra_delay)
                        CDEBUG(D_HA, "%s: Think about evicting %s from %lld\n",
                               exp->exp_obd->obd_name,
                               obd_export_nid2str(oldest_exp), oldest_time);
+
                }
+       }
+
+       spin_unlock(&exp->exp_obd->obd_dev_lock);
+
+       if (evict) {
+               /* Evict stale clients */
+               ping_evictor_wake(exp);
        } else {
                if (ktime_get_real_seconds() >
                    (exp->exp_obd->obd_eviction_timer + extra_delay)) {
index 9aeb52e..9130eb3 100755 (executable)
@@ -1077,10 +1077,10 @@ test_26a() {      # was test_26 bug 5921 - evict dead exports by pinger
        local before=$(date +%s)
        local rc=0
 
-       # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
+       # evictor takes PING_EVICT_TIMEOUT to evict.
        # But if there's a race to start the evictor from various obds,
        # the loser might have to wait for the next ping.
-       sleep $((TIMEOUT * 2 + TIMEOUT * 3 / 4))
+       sleep $((TIMEOUT * 2))
        do_facet client lctl set_param fail_loc=0x0
        do_facet client lfs df > /dev/null
 
@@ -1114,15 +1114,15 @@ test_26b() {      # bug 10140 - evict dead exports by pinger
        # PING_INTERVAL max(obd_timeout / 4, 1U)
        # PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
 
-       # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
+       # evictor takes PING_EVICT_TIMEOUT to evict.
        # But if there's a race to start the evictor from various obds,
        # the loser might have to wait for the next ping.
-       # = 9 * PING_INTERVAL + PING_INTERVAL
-       # = 10 PING_INTERVAL = 10 obd_timeout / 4 = 2.5 obd_timeout
-       # let's wait $((TIMEOUT * 3)) # bug 19887
-       wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 3)) ||
+       # = 6 * PING_INTERVAL + PING_INTERVAL
+       # = 7 PING_INTERVAL = 7 obd_timeout / 4 =  (1+3/4)obd_timeout
+       # let's wait $((TIMEOUT * 2)) # bug 19887
+       wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 2)) ||
                error "Client was not evicted by ost"
-       wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 3)) ||
+       wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 2)) ||
                error "Client was not evicted by mds"
 }
 run_test 26b "evict dead exports"
index e6af26f..551296e 100755 (executable)
@@ -63,13 +63,15 @@ test_0a() {
        $LCTL set_param fail_loc=0x80000514
        facet_failover $SINGLEMDS
        [ -f "$LU482_FAILED" ] && skip "LU-482 failure" && return 0
-       client_up || return 1
+       client_up || (sleep 10; client_up) || (sleep 10; client_up) ||
+               error "reconnect failed"
        umount -f $MOUNT2
-       client_up || return 1
-       zconf_mount `hostname` $MOUNT2 || error "mount2 fais"
-       unlinkmany $MOUNT1/$tfile- 50 || return 2
-       rm $MOUNT2/$tfile || return 3
-       rm $MOUNT2/$tfile-A || return 4
+       client_up || (sleep 10; client_up) || (sleep 10; client_up) ||
+               error "reconnect failed"
+       zconf_mount `hostname` $MOUNT2 || error "mount2 failed"
+       unlinkmany $MOUNT1/$tfile- 50 || errot "unlinkmany failed"
+       rm $MOUNT2/$tfile || error "rm $MOUNT2/$tfile failed"
+       rm $MOUNT2/$tfile-A || error "rm $MOUNT2/$tfile-A failed"
 }
 run_test 0a "expired recovery with lost client"