Whamcloud - gitweb
LU-17142 mgc: reconnection without pinger
authorAlexander Boyko <alexander.boyko@hpe.com>
Tue, 22 Aug 2023 09:53:14 +0000 (05:53 -0400)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 24 Nov 2023 09:25:17 +0000 (09:25 +0000)
When MGS was offline for some time, AT is increased and
connection request deadline is high. Reconnect with a pinger
waits a request deadline for a next attempt. A situation is
worse with a failover partner, when different connections are used.
Reconnection could fail with local MGS too.

Here is the error when MGC could not connect to a local MGS, MDT
combined with MGS.

    LustreError: 15c-8: MGC90@kfi:
    Confguration from log kjlmo12-MDT0000 failed from MGS -5.

The patch forces reconnection with import invalidate and aborts
inflight requests.

ptlrpc_recover_import() aborts waiting for disconnect import state.
But disconnect happens between connection attempt and it is valid.
This is fixed.

Reset Adaptive Timeout when local MGS starts. It allows MGC to
reconnect efficiently.

mgs_barrier_gl_interpret_reply() should handle -EINVAL from a client,
it means client don't have a lock.

Lustre-change: https://review.whamcloud.com/52498
Lustre-commit: 867ba433e3a0fce4a1b2f8d37a91d550ada41a26

HPE-bug-id: LUS-11633
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: Ie631e04fb3e72900af076cf7f268f20f7b285445
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53116
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/mgc/mgc_request.c
lustre/mgs/mgs_barrier.c
lustre/ptlrpc/import.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/recover.c

index cb467df..27b5819 100644 (file)
@@ -1221,6 +1221,13 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
                 * 1. we previously got disconnected,
                 * 2. value > 1 (at the same node with MGS)
                 * */
+               if (value > 1) {
+                       struct adaptive_timeout *at;
+
+                       at = &imp->imp_at.iat_net_latency;
+                       at_reset(at, INITIAL_CONNECT_TIMEOUT);
+               }
+
                if (imp->imp_state == LUSTRE_IMP_DISCON || value > 1)
                        ptlrpc_reconnect_import(imp);
 
index 445db9f..90fa72d 100644 (file)
@@ -62,7 +62,7 @@ static int mgs_barrier_gl_interpret_reply(const struct lu_env *env,
        ENTRY;
 
        if (rc) {
-               if (rc == -ENODEV) {
+               if (rc == -ENODEV || rc == -EINVAL) {
                        /* The lock is useless, cancel it. */
                        ldlm_lock_cancel(ca->ca_lock);
                        rc = 0;
index c8982cc..f1ac344 100644 (file)
@@ -467,31 +467,12 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
 
 int ptlrpc_reconnect_import(struct obd_import *imp)
 {
-#ifdef CONFIG_LUSTRE_FS_PINGER
-       long timeout_jiffies = cfs_time_seconds(obd_timeout);
-       int rc;
-
-       ptlrpc_pinger_force(imp);
-
-       CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
-              obd2cli_tgt(imp->imp_obd), obd_timeout);
+       int rc = 0;
+       ENTRY;
 
-       rc = wait_event_idle_timeout(imp->imp_recovery_waitq,
-                                    !ptlrpc_import_in_recovery(imp),
-                                    timeout_jiffies);
-       if (rc == 0)
-               rc = -ETIMEDOUT;
-       else
-               rc = 0;
-       CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd),
-              ptlrpc_import_state_name(imp->imp_state));
-       return rc;
-#else
-       ptlrpc_set_import_discon(imp, 0, false);
+       ptlrpc_set_import_discon(imp, 0, true);
        /* Force a new connect attempt */
        ptlrpc_invalidate_import(imp);
-       /* Do a fresh connect next time by zeroing the handle */
-       ptlrpc_disconnect_import(imp, 1);
        /* Wait for all invalidate calls to finish */
        if (atomic_read(&imp->imp_inval_count) > 0) {
                int rc;
@@ -506,12 +487,11 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
 
        /* Allow reconnect attempts */
        imp->imp_obd->obd_no_recov = 0;
-       /* Remove 'invalid' flag */
-       ptlrpc_activate_import(imp, false);
+       imp->imp_remote_handle.cookie = 0;
        /* Attempt a new connect */
-       ptlrpc_recover_import(imp, NULL, 0);
-       return 0;
-#endif
+       rc = ptlrpc_recover_import(imp, NULL, 0);
+
+       RETURN(rc);
 }
 EXPORT_SYMBOL(ptlrpc_reconnect_import);
 
index 9057568..445416a 100644 (file)
@@ -252,10 +252,14 @@ static void ptlrpc_pinger_process_import(struct obd_import *imp,
        if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) {
                /* wait for a while before trying recovery again */
                imp->imp_next_ping = ptlrpc_next_reconnect(imp);
-               spin_unlock(&imp->imp_lock);
                if (!imp->imp_no_pinger_recover ||
-                   imp->imp_connect_error == -EAGAIN)
-                       ptlrpc_initiate_recovery(imp);
+                   imp->imp_connect_error == -EAGAIN) {
+                       CDEBUG(D_HA, "%s: starting recovery\n",
+                              obd2cli_tgt(imp->imp_obd));
+                       ptlrpc_connect_import_locked(imp);
+               } else {
+                       spin_unlock(&imp->imp_lock);
+               }
        } else if (level != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov ||
                   imp_is_deactive(imp)) {
                CDEBUG(D_HA,
index 263d306..567f2c9 100644 (file)
@@ -99,12 +99,12 @@ void ptlrpc_exit_portals(void);
 
 void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
 void lustre_assert_wire_constants(void);
-int ptlrpc_import_in_recovery(struct obd_import *imp);
+bool ptlrpc_import_in_recovery(struct obd_import *imp);
+bool ptlrpc_import_in_recovery_disconnect(struct obd_import *imp, bool d);
 int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt,
                             bool invalid);
 void ptlrpc_handle_failed_import(struct obd_import *imp);
 int ptlrpc_replay_next(struct obd_import *imp, int *inflight);
-void ptlrpc_initiate_recovery(struct obd_import *imp);
 
 int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset);
 int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset);
index d2da4a4..e015cd8 100644 (file)
 #include "ptlrpc_internal.h"
 
 /**
- * Start recovery on disconnected import.
- * This is done by just attempting a connect
- */
-void ptlrpc_initiate_recovery(struct obd_import *imp)
-{
-        ENTRY;
-
-        CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
-        ptlrpc_connect_import(imp);
-
-        EXIT;
-}
-
-/**
  * Identify what request from replay list needs to be replayed next
  * (based on what we have already replayed) and send it to server.
  */
@@ -301,6 +287,23 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
 }
 EXPORT_SYMBOL(ptlrpc_set_import_active);
 
+bool ptlrpc_import_in_recovery_disconnect(struct obd_import *imp,
+                                         bool disconnect_is_recovery)
+{
+       bool in_recovery = true;
+
+       spin_lock(&imp->imp_lock);
+       if (imp->imp_state < LUSTRE_IMP_DISCON ||
+           (!disconnect_is_recovery && imp->imp_state == LUSTRE_IMP_DISCON) ||
+           imp->imp_state >= LUSTRE_IMP_FULL ||
+           imp->imp_obd->obd_no_recov)
+               in_recovery = false;
+       spin_unlock(&imp->imp_lock);
+
+       return in_recovery;
+}
+
+
 /* Attempt to reconnect an import */
 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
 {
@@ -335,30 +338,28 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
                rc = -EALREADY;
        }
        spin_unlock(&imp->imp_lock);
-       if (rc)
-               GOTO(out, rc);
-
-       OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE);
 
-       rc = ptlrpc_connect_import(imp);
-       if (rc)
-               GOTO(out, rc);
+       if (!rc) {
+               OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE);
+               rc = ptlrpc_connect_import(imp);
+       }
 
-       if (!async) {
+       if (!async && (rc == -EALREADY || rc == 0)) {
                long timeout = cfs_time_seconds(obd_timeout);
 
-               CDEBUG(D_HA, "%s: recovery started, waiting %u jiffies\n",
+               CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
                       obd2cli_tgt(imp->imp_obd), obd_timeout);
 
                rc = wait_event_idle_timeout(imp->imp_recovery_waitq,
-                                            !ptlrpc_import_in_recovery(imp),
-                                            timeout);
+                       !ptlrpc_import_in_recovery_disconnect(imp, true),
+                       timeout);
                if (rc == 0)
                        rc = -ETIMEDOUT;
                else
                        rc = 0;
-               CDEBUG(D_HA, "%s: recovery finished\n",
-                      obd2cli_tgt(imp->imp_obd));
+               CDEBUG(D_HA, "%s: recovery finished %s, rc = %d\n",
+                      obd2cli_tgt(imp->imp_obd),
+                      ptlrpc_import_state_name(imp->imp_state), rc);
        }
        EXIT;
 
@@ -367,16 +368,7 @@ out:
 }
 EXPORT_SYMBOL(ptlrpc_recover_import);
 
-int ptlrpc_import_in_recovery(struct obd_import *imp)
+bool ptlrpc_import_in_recovery(struct obd_import *imp)
 {
-       int in_recovery = 1;
-
-       spin_lock(&imp->imp_lock);
-       if (imp->imp_state <= LUSTRE_IMP_DISCON ||
-           imp->imp_state >= LUSTRE_IMP_FULL ||
-           imp->imp_obd->obd_no_recov)
-               in_recovery = 0;
-       spin_unlock(&imp->imp_lock);
-
-       return in_recovery;
+       return ptlrpc_import_in_recovery_disconnect(imp, false);
 }