Whamcloud - gitweb
LU-17142 mgc: reconnection without pinger 98/52498/5
authorAlexander Boyko <alexander.boyko@hpe.com>
Tue, 22 Aug 2023 09:53:14 +0000 (05:53 -0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 18 Nov 2023 21:41:53 +0000 (21:41 +0000)
When MGS was offline for some time, AT is increased and
connection request deadline is high. Reconnect with a pinger
waits a request deadline for a next attempt. A situation is
worse with a failover partner, when different connections are used.
Reconnection could fail with local MGS too.

Here is the error when MGC could not connect to a local MGS, MDT
combined with MGS.

LustreError: 15c-8: MGC90@kfi:
Confguration from log kjlmo12-MDT0000 failed from MGS -5.

The patch forces reconnection with import invalidate and aborts
inflight requests.

ptlrpc_recover_import() aborts waiting for disconnect import state.
But disconnect happens between connection attempt and it is valid.
This is fixed.

Reset Adaptive Timeout when local MGS starts. It allows MGC to
reconnect efficiently.

mgs_barrier_gl_interpret_reply() should handle EINVAL from a client,
it means client don't have a lock.

HPE-bug-id: LUS-11633
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: Ie631e04fb3e72900af076cf7f268f20f7b285445
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52498
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mgc/mgc_request.c
lustre/mgs/mgs_barrier.c
lustre/ptlrpc/import.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/recover.c

index 0d4777c..2814b7b 100644 (file)
@@ -1047,6 +1047,13 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
                 * 1. we previously got disconnected,
                 * 2. value > 1 (at the same node with MGS)
                 * */
+               if (value > 1) {
+                       struct adaptive_timeout *at;
+
+                       at = &imp->imp_at.iat_net_latency;
+                       at_reset(at, INITIAL_CONNECT_TIMEOUT);
+               }
+
                if (imp->imp_state == LUSTRE_IMP_DISCON || value > 1)
                        ptlrpc_reconnect_import(imp);
 
index 75ab5ae..e3a405c 100644 (file)
@@ -62,7 +62,7 @@ static int mgs_barrier_gl_interpret_reply(const struct lu_env *env,
        ENTRY;
 
        if (rc) {
-               if (rc == -ENODEV) {
+               if (rc == -ENODEV || rc == -EINVAL) {
                        /* The lock is useless, cancel it. */
                        ldlm_lock_cancel(ca->ca_lock);
                        rc = 0;
index f63eaf3..3a6bcac 100644 (file)
@@ -456,31 +456,12 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
 
 int ptlrpc_reconnect_import(struct obd_import *imp)
 {
-#ifdef CONFIG_LUSTRE_FS_PINGER
-       long timeout_jiffies = cfs_time_seconds(obd_timeout);
-       int rc;
-
-       ptlrpc_pinger_force(imp);
-
-       CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
-              obd2cli_tgt(imp->imp_obd), obd_timeout);
+       int rc = 0;
+       ENTRY;
 
-       rc = wait_event_idle_timeout(imp->imp_recovery_waitq,
-                                    !ptlrpc_import_in_recovery(imp),
-                                    timeout_jiffies);
-       if (rc == 0)
-               rc = -ETIMEDOUT;
-       else
-               rc = 0;
-       CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd),
-              ptlrpc_import_state_name(imp->imp_state));
-       return rc;
-#else
-       ptlrpc_set_import_discon(imp, 0, false);
+       ptlrpc_set_import_discon(imp, 0, true);
        /* Force a new connect attempt */
        ptlrpc_invalidate_import(imp);
-       /* Do a fresh connect next time by zeroing the handle */
-       ptlrpc_disconnect_import(imp, 1);
        /* Wait for all invalidate calls to finish */
        if (atomic_read(&imp->imp_inval_count) > 0) {
                int rc;
@@ -495,12 +476,11 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
 
        /* Allow reconnect attempts */
        imp->imp_obd->obd_no_recov = 0;
-       /* Remove 'invalid' flag */
-       ptlrpc_activate_import(imp, false);
+       imp->imp_remote_handle.cookie = 0;
        /* Attempt a new connect */
-       ptlrpc_recover_import(imp, NULL, 0);
-       return 0;
-#endif
+       rc = ptlrpc_recover_import(imp, NULL, 0);
+
+       RETURN(rc);
 }
 EXPORT_SYMBOL(ptlrpc_reconnect_import);
 
index 4c800c9..b122c42 100644 (file)
@@ -254,10 +254,14 @@ static void ptlrpc_pinger_process_import(struct obd_import *imp,
        if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) {
                /* wait for a while before trying recovery again */
                imp->imp_next_ping = ptlrpc_next_reconnect(imp);
-               spin_unlock(&imp->imp_lock);
                if (!imp->imp_no_pinger_recover ||
-                   imp->imp_connect_error == -EAGAIN)
-                       ptlrpc_initiate_recovery(imp);
+                   imp->imp_connect_error == -EAGAIN) {
+                       CDEBUG(D_HA, "%s: starting recovery\n",
+                              obd2cli_tgt(imp->imp_obd));
+                       ptlrpc_connect_import_locked(imp);
+               } else {
+                       spin_unlock(&imp->imp_lock);
+               }
        } else if (level != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov ||
                   imp_is_deactive(imp)) {
                CDEBUG(D_HA,
index 4d68dca..4b1e15f 100644 (file)
@@ -98,12 +98,12 @@ void ptlrpc_exit_portals(void);
 
 void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
 void lustre_assert_wire_constants(void);
-int ptlrpc_import_in_recovery(struct obd_import *imp);
+bool ptlrpc_import_in_recovery(struct obd_import *imp);
+bool ptlrpc_import_in_recovery_disconnect(struct obd_import *imp, bool d);
 int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt,
                             bool invalid);
 void ptlrpc_handle_failed_import(struct obd_import *imp);
 int ptlrpc_replay_next(struct obd_import *imp, int *inflight);
-void ptlrpc_initiate_recovery(struct obd_import *imp);
 
 int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset);
 int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset);
index d9ab8fc..565ec13 100644 (file)
 #include "ptlrpc_internal.h"
 
 /**
- * Start recovery on disconnected import.
- * This is done by just attempting a connect
- */
-void ptlrpc_initiate_recovery(struct obd_import *imp)
-{
-        ENTRY;
-
-        CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
-        ptlrpc_connect_import(imp);
-
-        EXIT;
-}
-
-/**
  * Identify what request from replay list needs to be replayed next
  * (based on what we have already replayed) and send it to server.
  */
@@ -296,6 +282,23 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
 }
 EXPORT_SYMBOL(ptlrpc_set_import_active);
 
+bool ptlrpc_import_in_recovery_disconnect(struct obd_import *imp,
+                                         bool disconnect_is_recovery)
+{
+       bool in_recovery = true;
+
+       spin_lock(&imp->imp_lock);
+       if (imp->imp_state < LUSTRE_IMP_DISCON ||
+           (!disconnect_is_recovery && imp->imp_state == LUSTRE_IMP_DISCON) ||
+           imp->imp_state >= LUSTRE_IMP_FULL ||
+           imp->imp_obd->obd_no_recov)
+               in_recovery = false;
+       spin_unlock(&imp->imp_lock);
+
+       return in_recovery;
+}
+
+
 /* Attempt to reconnect an import */
 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
 {
@@ -330,30 +333,28 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
                rc = -EALREADY;
        }
        spin_unlock(&imp->imp_lock);
-       if (rc)
-               GOTO(out, rc);
-
-       CFS_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE);
 
-       rc = ptlrpc_connect_import(imp);
-       if (rc)
-               GOTO(out, rc);
+       if (!rc) {
+               CFS_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE);
+               rc = ptlrpc_connect_import(imp);
+       }
 
-       if (!async) {
+       if (!async && (rc == -EALREADY || rc == 0)) {
                long timeout = cfs_time_seconds(obd_timeout);
 
-               CDEBUG(D_HA, "%s: recovery started, waiting %u jiffies\n",
+               CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
                       obd2cli_tgt(imp->imp_obd), obd_timeout);
 
                rc = wait_event_idle_timeout(imp->imp_recovery_waitq,
-                                            !ptlrpc_import_in_recovery(imp),
-                                            timeout);
+                       !ptlrpc_import_in_recovery_disconnect(imp, true),
+                       timeout);
                if (rc == 0)
                        rc = -ETIMEDOUT;
                else
                        rc = 0;
-               CDEBUG(D_HA, "%s: recovery finished\n",
-                      obd2cli_tgt(imp->imp_obd));
+               CDEBUG(D_HA, "%s: recovery finished %s, rc = %d\n",
+                      obd2cli_tgt(imp->imp_obd),
+                      ptlrpc_import_state_name(imp->imp_state), rc);
        }
        EXIT;
 
@@ -362,16 +363,7 @@ out:
 }
 EXPORT_SYMBOL(ptlrpc_recover_import);
 
-int ptlrpc_import_in_recovery(struct obd_import *imp)
+bool ptlrpc_import_in_recovery(struct obd_import *imp)
 {
-       int in_recovery = 1;
-
-       spin_lock(&imp->imp_lock);
-       if (imp->imp_state <= LUSTRE_IMP_DISCON ||
-           imp->imp_state >= LUSTRE_IMP_FULL ||
-           imp->imp_obd->obd_no_recov)
-               in_recovery = 0;
-       spin_unlock(&imp->imp_lock);
-
-       return in_recovery;
+       return ptlrpc_import_in_recovery_disconnect(imp, false);
 }