From 867ba433e3a0fce4a1b2f8d37a91d550ada41a26 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Tue, 22 Aug 2023 05:53:14 -0400 Subject: [PATCH] LU-17142 mgc: reconnection without pinger When MGS was offline for some time, AT is increased and connection request deadline is high. Reconnect with a pinger waits a request deadline for a next attempt. A situation is worse with a failover partner, when different connections are used. Reconnection could fail with local MGS too. Here is the error when MGC could not connect to a local MGS, MDT combined with MGS. LustreError: 15c-8: MGC90@kfi: Confguration from log kjlmo12-MDT0000 failed from MGS -5. The patch forces reconnection with import invalidate and aborts inflight requests. ptlrpc_recover_import() aborts waiting for disconnect import state. But disconnect happens between connection attempt and it is valid. This is fixed. Reset Adaptive Timeout when local MGS starts. It allows MGC to reconnect efficiently. mgs_barrier_gl_interpret_reply() should handle EINVAL from a client, it means client don't have a lock. HPE-bug-id: LUS-11633 Signed-off-by: Alexander Boyko Change-Id: Ie631e04fb3e72900af076cf7f268f20f7b285445 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52498 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Andriy Skulysh Reviewed-by: Oleg Drokin --- lustre/mgc/mgc_request.c | 7 +++++ lustre/mgs/mgs_barrier.c | 2 +- lustre/ptlrpc/import.c | 34 +++++---------------- lustre/ptlrpc/pinger.c | 10 ++++-- lustre/ptlrpc/ptlrpc_internal.h | 4 +-- lustre/ptlrpc/recover.c | 68 ++++++++++++++++++----------------------- 6 files changed, 54 insertions(+), 71 deletions(-) diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 0d4777c..2814b7b 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -1047,6 +1047,13 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp, * 1. we previously got disconnected, * 2. value > 1 (at the same node with MGS) * */ + if (value > 1) { + struct adaptive_timeout *at; + + at = &imp->imp_at.iat_net_latency; + at_reset(at, INITIAL_CONNECT_TIMEOUT); + } + if (imp->imp_state == LUSTRE_IMP_DISCON || value > 1) ptlrpc_reconnect_import(imp); diff --git a/lustre/mgs/mgs_barrier.c b/lustre/mgs/mgs_barrier.c index 75ab5ae..e3a405c 100644 --- a/lustre/mgs/mgs_barrier.c +++ b/lustre/mgs/mgs_barrier.c @@ -62,7 +62,7 @@ static int mgs_barrier_gl_interpret_reply(const struct lu_env *env, ENTRY; if (rc) { - if (rc == -ENODEV) { + if (rc == -ENODEV || rc == -EINVAL) { /* The lock is useless, cancel it. */ ldlm_lock_cancel(ca->ca_lock); rc = 0; diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index f63eaf3..3a6bcac 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -456,31 +456,12 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt) int ptlrpc_reconnect_import(struct obd_import *imp) { -#ifdef CONFIG_LUSTRE_FS_PINGER - long timeout_jiffies = cfs_time_seconds(obd_timeout); - int rc; - - ptlrpc_pinger_force(imp); - - CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n", - obd2cli_tgt(imp->imp_obd), obd_timeout); + int rc = 0; + ENTRY; - rc = wait_event_idle_timeout(imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp), - timeout_jiffies); - if (rc == 0) - rc = -ETIMEDOUT; - else - rc = 0; - CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd), - ptlrpc_import_state_name(imp->imp_state)); - return rc; -#else - ptlrpc_set_import_discon(imp, 0, false); + ptlrpc_set_import_discon(imp, 0, true); /* Force a new connect attempt */ ptlrpc_invalidate_import(imp); - /* Do a fresh connect next time by zeroing the handle */ - ptlrpc_disconnect_import(imp, 1); /* Wait for all invalidate calls to finish */ if (atomic_read(&imp->imp_inval_count) > 0) { int rc; @@ -495,12 +476,11 @@ int ptlrpc_reconnect_import(struct obd_import *imp) /* Allow reconnect attempts */ imp->imp_obd->obd_no_recov = 0; - /* Remove 'invalid' flag */ - ptlrpc_activate_import(imp, false); + imp->imp_remote_handle.cookie = 0; /* Attempt a new connect */ - ptlrpc_recover_import(imp, NULL, 0); - return 0; -#endif + rc = ptlrpc_recover_import(imp, NULL, 0); + + RETURN(rc); } EXPORT_SYMBOL(ptlrpc_reconnect_import); diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 4c800c9..b122c42 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -254,10 +254,14 @@ static void ptlrpc_pinger_process_import(struct obd_import *imp, if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) { /* wait for a while before trying recovery again */ imp->imp_next_ping = ptlrpc_next_reconnect(imp); - spin_unlock(&imp->imp_lock); if (!imp->imp_no_pinger_recover || - imp->imp_connect_error == -EAGAIN) - ptlrpc_initiate_recovery(imp); + imp->imp_connect_error == -EAGAIN) { + CDEBUG(D_HA, "%s: starting recovery\n", + obd2cli_tgt(imp->imp_obd)); + ptlrpc_connect_import_locked(imp); + } else { + spin_unlock(&imp->imp_lock); + } } else if (level != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov || imp_is_deactive(imp)) { CDEBUG(D_HA, diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 4d68dca..4b1e15f 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -98,12 +98,12 @@ void ptlrpc_exit_portals(void); void ptlrpc_request_handle_notconn(struct ptlrpc_request *); void lustre_assert_wire_constants(void); -int ptlrpc_import_in_recovery(struct obd_import *imp); +bool ptlrpc_import_in_recovery(struct obd_import *imp); +bool ptlrpc_import_in_recovery_disconnect(struct obd_import *imp, bool d); int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt, bool invalid); void ptlrpc_handle_failed_import(struct obd_import *imp); int ptlrpc_replay_next(struct obd_import *imp, int *inflight); -void ptlrpc_initiate_recovery(struct obd_import *imp); int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset); int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index d9ab8fc..565ec13 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -47,20 +47,6 @@ #include "ptlrpc_internal.h" /** - * Start recovery on disconnected import. - * This is done by just attempting a connect - */ -void ptlrpc_initiate_recovery(struct obd_import *imp) -{ - ENTRY; - - CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd)); - ptlrpc_connect_import(imp); - - EXIT; -} - -/** * Identify what request from replay list needs to be replayed next * (based on what we have already replayed) and send it to server. */ @@ -296,6 +282,23 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) } EXPORT_SYMBOL(ptlrpc_set_import_active); +bool ptlrpc_import_in_recovery_disconnect(struct obd_import *imp, + bool disconnect_is_recovery) +{ + bool in_recovery = true; + + spin_lock(&imp->imp_lock); + if (imp->imp_state < LUSTRE_IMP_DISCON || + (!disconnect_is_recovery && imp->imp_state == LUSTRE_IMP_DISCON) || + imp->imp_state >= LUSTRE_IMP_FULL || + imp->imp_obd->obd_no_recov) + in_recovery = false; + spin_unlock(&imp->imp_lock); + + return in_recovery; +} + + /* Attempt to reconnect an import */ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async) { @@ -330,30 +333,28 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async) rc = -EALREADY; } spin_unlock(&imp->imp_lock); - if (rc) - GOTO(out, rc); - - CFS_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE); - rc = ptlrpc_connect_import(imp); - if (rc) - GOTO(out, rc); + if (!rc) { + CFS_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE); + rc = ptlrpc_connect_import(imp); + } - if (!async) { + if (!async && (rc == -EALREADY || rc == 0)) { long timeout = cfs_time_seconds(obd_timeout); - CDEBUG(D_HA, "%s: recovery started, waiting %u jiffies\n", + CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n", obd2cli_tgt(imp->imp_obd), obd_timeout); rc = wait_event_idle_timeout(imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp), - timeout); + !ptlrpc_import_in_recovery_disconnect(imp, true), + timeout); if (rc == 0) rc = -ETIMEDOUT; else rc = 0; - CDEBUG(D_HA, "%s: recovery finished\n", - obd2cli_tgt(imp->imp_obd)); + CDEBUG(D_HA, "%s: recovery finished %s, rc = %d\n", + obd2cli_tgt(imp->imp_obd), + ptlrpc_import_state_name(imp->imp_state), rc); } EXIT; @@ -362,16 +363,7 @@ out: } EXPORT_SYMBOL(ptlrpc_recover_import); -int ptlrpc_import_in_recovery(struct obd_import *imp) +bool ptlrpc_import_in_recovery(struct obd_import *imp) { - int in_recovery = 1; - - spin_lock(&imp->imp_lock); - if (imp->imp_state <= LUSTRE_IMP_DISCON || - imp->imp_state >= LUSTRE_IMP_FULL || - imp->imp_obd->obd_no_recov) - in_recovery = 0; - spin_unlock(&imp->imp_lock); - - return in_recovery; + return ptlrpc_import_in_recovery_disconnect(imp, false); } -- 1.8.3.1