From 6bdeda7afe92d61db56367875774fa074aaac0fd Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Fri, 16 Sep 2022 04:00:38 -0400 Subject: [PATCH] LU-16002 ptlrpc: reduce pinger eviction time On a server side eviction is based on PING_INTERVAL. A client should be evicted after PING_EVICT_TIMEOUT. But eviction logic adds additional 3 PING_INTERVAL for it. For a configuration with obd_timeout equal to 300, addition is 225 seconds. The second level timeout is needed when network is down for some time. And it prevents clients evictions after first connection. Patch adds additional logic to check if an import is active, and evict client faster without second level. It reduces an eviction timeout to a PING_EVICT_TIMEOUT. replay_dual test_0a is based on a client eviction during recovery, lfs df check could fail because of eviction. So complete check similar to recovery-small.sh Test-Parameters: testlist=recovery-small env=RECOVERY_SMALL_EXCEPT=144 serverversion=2.14 HPE-bug-id: LUS-11054 Signed-off-by: Alexander Boyko Change-Id: I4d60046ef4737f9cf95a16ac0ab63a36859b8adc Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/47928 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexander Zarochentsev Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/ptlrpc/service.c | 50 ++++++++++++++++++++++++++++-------------- lustre/tests/recovery-small.sh | 16 +++++++------- lustre/tests/replay-dual.sh | 14 +++++++----- 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 03694f1..4135de3 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1112,9 +1112,9 @@ static void ptlrpc_server_finish_active_request( */ void ptlrpc_update_export_timer(struct obd_export *exp, time64_t extra_delay) { - struct obd_export *oldest_exp; - time64_t oldest_time, new_time; - + struct obd_export *oldest_exp, *newest_exp; + time64_t oldest_time, current_time; + bool evict = false; ENTRY; LASSERT(exp); @@ -1128,11 +1128,12 @@ void ptlrpc_update_export_timer(struct obd_export *exp, time64_t extra_delay) */ /* Do not pay attention on 1sec or smaller renewals. */ - new_time = ktime_get_real_seconds() + extra_delay; - if (exp->exp_last_request_time + 1 /*second */ >= new_time) + current_time = ktime_get_real_seconds(); + /* 1 seconds */ + if (exp->exp_last_request_time + 1 >= current_time + extra_delay) RETURN_EXIT; - exp->exp_last_request_time = new_time; + exp->exp_last_request_time = current_time + extra_delay; /* * exports may get disconnected from the chain even though the @@ -1147,25 +1148,32 @@ void ptlrpc_update_export_timer(struct obd_export *exp, time64_t extra_delay) RETURN_EXIT; } + newest_exp = list_entry(exp->exp_obd->obd_exports_timed.prev, + struct obd_export, exp_obd_chain_timed); + list_move_tail(&exp->exp_obd_chain_timed, &exp->exp_obd->obd_exports_timed); + if (exp->exp_obd->obd_recovering) { + /* be nice to everyone during recovery */ + spin_unlock(&exp->exp_obd->obd_dev_lock); + RETURN_EXIT; + } + oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next, struct obd_export, exp_obd_chain_timed); + oldest_time = oldest_exp->exp_last_request_time; - spin_unlock(&exp->exp_obd->obd_dev_lock); - if (exp->exp_obd->obd_recovering) { - /* be nice to everyone during recovery */ - EXIT; - return; - } + /* Check if the oldest entry is expired. */ + if (exp->exp_obd->obd_eviction_timer == 0 && + current_time > oldest_time + PING_EVICT_TIMEOUT + extra_delay) { - /* Note - racing to start/reset the obd_eviction timer is safe */ - if (exp->exp_obd->obd_eviction_timer == 0) { - /* Check if the oldest entry is expired. */ - if (ktime_get_real_seconds() > - oldest_time + PING_EVICT_TIMEOUT + extra_delay) { + if (current_time < newest_exp->exp_last_request_time + + PING_EVICT_TIMEOUT / 2) { + /* If import is active - evict stale clients */ + evict = true; + } else { /* * We need a second timer, in case the net was down and * it just came back. Since the pinger may skip every @@ -1177,7 +1185,15 @@ void ptlrpc_update_export_timer(struct obd_export *exp, time64_t extra_delay) CDEBUG(D_HA, "%s: Think about evicting %s from %lld\n", exp->exp_obd->obd_name, obd_export_nid2str(oldest_exp), oldest_time); + } + } + + spin_unlock(&exp->exp_obd->obd_dev_lock); + + if (evict) { + /* Evict stale clients */ + ping_evictor_wake(exp); } else { if (ktime_get_real_seconds() > (exp->exp_obd->obd_eviction_timer + extra_delay)) { diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 9aeb52e..9130eb3 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1077,10 +1077,10 @@ test_26a() { # was test_26 bug 5921 - evict dead exports by pinger local before=$(date +%s) local rc=0 - # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict. + # evictor takes PING_EVICT_TIMEOUT to evict. # But if there's a race to start the evictor from various obds, # the loser might have to wait for the next ping. - sleep $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) + sleep $((TIMEOUT * 2)) do_facet client lctl set_param fail_loc=0x0 do_facet client lfs df > /dev/null @@ -1114,15 +1114,15 @@ test_26b() { # bug 10140 - evict dead exports by pinger # PING_INTERVAL max(obd_timeout / 4, 1U) # PING_EVICT_TIMEOUT (PING_INTERVAL * 6) - # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict. + # evictor takes PING_EVICT_TIMEOUT to evict. # But if there's a race to start the evictor from various obds, # the loser might have to wait for the next ping. - # = 9 * PING_INTERVAL + PING_INTERVAL - # = 10 PING_INTERVAL = 10 obd_timeout / 4 = 2.5 obd_timeout - # let's wait $((TIMEOUT * 3)) # bug 19887 - wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 3)) || + # = 6 * PING_INTERVAL + PING_INTERVAL + # = 7 PING_INTERVAL = 7 obd_timeout / 4 = (1+3/4)obd_timeout + # let's wait $((TIMEOUT * 2)) # bug 19887 + wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 2)) || error "Client was not evicted by ost" - wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 3)) || + wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 2)) || error "Client was not evicted by mds" } run_test 26b "evict dead exports" diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index e6af26f..551296e 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -63,13 +63,15 @@ test_0a() { $LCTL set_param fail_loc=0x80000514 facet_failover $SINGLEMDS [ -f "$LU482_FAILED" ] && skip "LU-482 failure" && return 0 - client_up || return 1 + client_up || (sleep 10; client_up) || (sleep 10; client_up) || + error "reconnect failed" umount -f $MOUNT2 - client_up || return 1 - zconf_mount `hostname` $MOUNT2 || error "mount2 fais" - unlinkmany $MOUNT1/$tfile- 50 || return 2 - rm $MOUNT2/$tfile || return 3 - rm $MOUNT2/$tfile-A || return 4 + client_up || (sleep 10; client_up) || (sleep 10; client_up) || + error "reconnect failed" + zconf_mount `hostname` $MOUNT2 || error "mount2 failed" + unlinkmany $MOUNT1/$tfile- 50 || errot "unlinkmany failed" + rm $MOUNT2/$tfile || error "rm $MOUNT2/$tfile failed" + rm $MOUNT2/$tfile-A || error "rm $MOUNT2/$tfile-A failed" } run_test 0a "expired recovery with lost client" -- 1.8.3.1