Whamcloud - gitweb
LU-17809 osp: make disconnect asynchronous 95/54995/7
authorAlexander Boyko <alexander.boyko@hpe.com>
Sat, 20 Apr 2024 22:02:54 +0000 (18:02 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 19 Jun 2024 00:46:17 +0000 (00:46 +0000)
MDT could have many osp devices. During umount there is a problem
of casscading timeouts of disconnect request. It could lead to
unpredictable large umount time.

This patch adds ability of parallel disconnect for OSP devices.
During LCFG_PRECLEANUP osp_disconnect() sends disconnects requests.
And osp_shutdown() waits it. So casscading timeouts were changed
to a single request wait.

Don't drop obd_force flag from upper layers.

Adds replay-single test 201, it simulates delays of OSP disconnects.
This leads to a high cumulative umount time.

HPE-bug-id: LUS-12251
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: Id788b22c494147bdc7f0d36968629e7b7f660e01
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54995
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
lustre/include/lustre_net.h
lustre/osp/osp_dev.c
lustre/osp/osp_internal.h
lustre/ptlrpc/import.c
lustre/tests/replay-single.sh

index a0b8b7d..f97d0d9 100644 (file)
@@ -2251,6 +2251,8 @@ void ptlrpc_watchdog_delete(struct delayed_work *work);
 int ptlrpc_connect_import(struct obd_import *imp);
 int ptlrpc_connect_import_locked(struct obd_import *imp);
 int ptlrpc_init_import(struct obd_import *imp);
+int ptlrpc_disconnect_import_async(struct obd_import *imp, int noclose,
+                                  struct completion *a, int *r);
 int ptlrpc_disconnect_import(struct obd_import *imp, int noclose);
 int ptlrpc_disconnect_and_idle_import(struct obd_import *imp);
 int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
index 702e030..2906366 100644 (file)
@@ -467,13 +467,17 @@ static int osp_disconnect(struct osp_device *d)
        struct obd_device *obd = d->opd_obd;
        struct obd_import *imp;
        int rc = 0;
+       ENTRY;
 
        imp = obd->u.cli.cl_import;
 
+       CDEBUG(D_INFO, "%s: disconnecting import %px\n", obd->obd_name,
+              imp);
        /* Mark import deactivated now, so we don't try to reconnect if any
         * of the cleanup RPCs fails (e.g. ldlm cancel, etc).  We don't
         * fully deactivate the import, or that would drop all requests. */
        LASSERT(imp != NULL);
+
        spin_lock(&imp->imp_lock);
        imp->imp_deactive = 1;
        spin_unlock(&imp->imp_lock);
@@ -487,15 +491,13 @@ static int osp_disconnect(struct osp_device *d)
 
        /* Send disconnect on healthy import, do force disconnect otherwise */
        spin_lock(&imp->imp_lock);
-       imp->imp_obd->obd_force = imp->imp_state != LUSTRE_IMP_FULL;
+       imp->imp_obd->obd_force |= imp->imp_state != LUSTRE_IMP_FULL;
        spin_unlock(&imp->imp_lock);
 
-       rc = ptlrpc_disconnect_import(imp, 0);
-       if (rc != 0)
-               CERROR("%s: can't disconnect: rc = %d\n", obd->obd_name, rc);
-
-       ptlrpc_invalidate_import(imp);
-
+       init_completion(&d->opd_disconnect_cmplt);
+       d->opd_disconnecting = 1;
+       rc = ptlrpc_disconnect_import_async(imp, 0, &d->opd_disconnect_cmplt,
+                                           &d->opd_disconnect_res);
        RETURN(rc);
 }
 
@@ -615,13 +617,26 @@ static void osp_update_fini(const struct lu_env *env, struct osp_device *osp)
  */
 static int osp_shutdown(const struct lu_env *env, struct osp_device *d)
 {
-       int                      rc = 0;
+       struct obd_device *obd = d->opd_obd;
+       struct obd_import *imp = obd->u.cli.cl_import;
+       int  rc = 0;
        ENTRY;
 
        LASSERT(env);
 
-       rc = osp_disconnect(d);
+       /* Shutdown could be called during fail initialization, LCFG_CLEANUP
+        * without LCFG_PRE_CLEANUP phase, like
+        * lod_add_device()->obd_connect() failure.
+        */
+       if (d->opd_disconnecting) {
+               wait_for_completion(&d->opd_disconnect_cmplt);
+               rc = d->opd_disconnect_res;
 
+               if (rc != 0)
+                       CERROR("%s: can't disconnect: rc = %d\n",
+                              obd->obd_name, rc);
+       }
+       ptlrpc_invalidate_import(imp);
        osp_statfs_fini(d);
 
        if (!d->opd_connect_mdt) {
index cb8a143..4ed3ac1 100644 (file)
@@ -175,6 +175,8 @@ struct osp_device {
        struct obd_device               *opd_obd;
        struct obd_export               *opd_exp;
        struct obd_connect_data         *opd_connect_data;
+       struct completion               opd_disconnect_cmplt;
+       int                             opd_disconnect_res;
 
        /* connection status. */
        unsigned int                     opd_new_connection:1,
@@ -182,7 +184,8 @@ struct osp_device {
                                         opd_imp_connected:1,
                                         opd_imp_active:1,
                                         opd_imp_seen_connected:1,
-                                        opd_connect_mdt:1;
+                                        opd_connect_mdt:1,
+                                        opd_disconnecting:1;
 
        /* whether local recovery is completed:
         * reported via ->ldo_recovery_complete() */
index 02a8adc..1526c81 100644 (file)
@@ -1737,22 +1737,146 @@ static struct ptlrpc_request *ptlrpc_disconnect_prep_req(struct obd_import *imp)
        RETURN(req);
 }
 
-int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
+struct disconnect_async_arg {
+       struct completion       *daa_completion;
+       int                     *daa_result;
+       int                     daa_noclose;
+};
+
+/*
+ * Unlock import.
+ */
+static void ptlrpc_disconnect_import_end(struct obd_import *imp, int noclose)
+{
+       assert_spin_locked(&imp->imp_lock);
+
+       if (noclose)
+               import_set_state_nolock(imp, LUSTRE_IMP_DISCON);
+       else
+               import_set_state_nolock(imp, LUSTRE_IMP_CLOSED);
+       memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
+       spin_unlock(&imp->imp_lock);
+
+       obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
+       if (!noclose)
+               obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE);
+}
+
+static int ptlrpc_disconnect_interpet(const struct lu_env *env,
+                                     struct ptlrpc_request *req, void *args,
+                                     int rc)
+{
+       struct obd_import *imp = req->rq_import;
+       struct disconnect_async_arg *daa = args;
+
+       spin_lock(&imp->imp_lock);
+       ptlrpc_disconnect_import_end(imp, daa->daa_noclose);
+
+       if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ESHUTDOWN)
+               rc = 0;
+
+       if (daa->daa_result)
+               *daa->daa_result = rc;
+
+       complete(daa->daa_completion);
+
+       return 0;
+}
+
+/**
+ * Sends disconnect request and set import state DISCONNECT/CLOSED.
+ * Produces events IMP_EVENT_DISCON[IMP_EVENT_INACTIVE].
+ * Signals when it is complete.
+ *
+ * \param[in] imp              import
+ * \param[in] noclose          final close import
+ * \param[in] completion       completion to signal disconnect is finished
+ * \param[out] out_res         result of disconnection
+ *
+ * \retval 0                   on seccess
+ * \retval negative            negated errno on error
+ **/
+int ptlrpc_disconnect_import_async(struct obd_import *imp, int noclose,
+                                  struct completion *cmpl, int *out_res)
 {
        struct ptlrpc_request *req;
        int rc = 0;
-
+       struct disconnect_async_arg *daa;
        ENTRY;
 
-       if (imp->imp_obd->obd_force)
-               GOTO(set_state, rc);
+       spin_lock(&imp->imp_lock);
+       /* probably the import has been disconnected already being idle */
+       if (imp->imp_state != LUSTRE_IMP_FULL || imp->imp_obd->obd_force) {
+
+               ptlrpc_disconnect_import_end(imp, noclose);
+
+               if (out_res)
+                       *out_res = 0;
+               complete(cmpl);
+
+               RETURN(0);
+       }
+       spin_unlock(&imp->imp_lock);
+
+       req = ptlrpc_disconnect_prep_req(imp);
+
+       spin_lock(&imp->imp_lock);
+
+       if (IS_ERR(req) || imp->imp_state != LUSTRE_IMP_FULL ||
+           imp->imp_obd->obd_force) {
+
+               if (!IS_ERR(req))
+                       ptlrpc_req_put_with_imp_lock(req);
+
+               ptlrpc_disconnect_import_end(imp, noclose);
+               rc = IS_ERR(req) ? PTR_ERR(req) : 0;
+
+               if (out_res)
+                       *out_res = rc;
+               complete(cmpl);
+
+               RETURN(rc);
+       }
+       import_set_state_nolock(imp, LUSTRE_IMP_CONNECTING);
+       spin_unlock(&imp->imp_lock);
+
+       req->rq_interpret_reply = ptlrpc_disconnect_interpet;
+       daa = ptlrpc_req_async_args(daa, req);
+       daa->daa_completion = cmpl;
+       daa->daa_result = out_res;
+       daa->daa_noclose = noclose;
+
+       ptlrpcd_add_req(req);
+
+       RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_disconnect_import_async);
+
+/**
+ * Sends disconnect request and set import state DISCONNECT/CLOSED.
+ * Produces events IMP_EVENT_DISCON[IMP_EVENT_INACTIVE].
+ *
+ * \param[in] imp              import
+ * \param[in] noclose          final close import
+ *
+ * \retval 0                   on seccess
+ * \retval negative            negated errno on error
+ **/
+int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
+{
+       DECLARE_COMPLETION_ONSTACK(cmpl);
+       int rc;
+       ENTRY;
 
        /* probably the import has been disconnected already being idle */
        spin_lock(&imp->imp_lock);
-       if (imp->imp_state == LUSTRE_IMP_IDLE)
-               GOTO(out, rc);
+       if (imp->imp_state == LUSTRE_IMP_IDLE || imp->imp_obd->obd_force) {
+               ptlrpc_disconnect_import_end(imp, noclose);
+               RETURN(0);
+       }
        spin_unlock(&imp->imp_lock);
 
+
        if (ptlrpc_import_in_recovery(imp)) {
                long timeout_jiffies;
                time64_t timeout;
@@ -1781,37 +1905,10 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
                        rc = -EINTR;
        }
 
-       req = ptlrpc_disconnect_prep_req(imp);
-       if (IS_ERR(req))
-               GOTO(set_state, rc = PTR_ERR(req));
+       rc = ptlrpc_disconnect_import_async(imp, noclose, &cmpl, &rc);
 
-       spin_lock(&imp->imp_lock);
-       if (imp->imp_state != LUSTRE_IMP_FULL) {
-               ptlrpc_req_put_with_imp_lock(req);
-               GOTO(out, rc);
-       }
-       import_set_state_nolock(imp, LUSTRE_IMP_CONNECTING);
-       spin_unlock(&imp->imp_lock);
-
-       rc = ptlrpc_queue_wait(req);
-       ptlrpc_req_put(req);
+       wait_for_completion(&cmpl);
 
-set_state:
-       spin_lock(&imp->imp_lock);
-out:
-       if (noclose)
-               import_set_state_nolock(imp, LUSTRE_IMP_DISCON);
-       else
-               import_set_state_nolock(imp, LUSTRE_IMP_CLOSED);
-       memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
-       spin_unlock(&imp->imp_lock);
-
-       obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
-       if (!noclose)
-               obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE);
-
-       if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ESHUTDOWN)
-               rc = 0;
        RETURN(rc);
 }
 EXPORT_SYMBOL(ptlrpc_disconnect_import);
index a9f4200..5fefc33 100755 (executable)
@@ -5309,6 +5309,35 @@ test_200() {
 }
 run_test 200 "Dropping one OBD_PING should not cause disconnect"
 
+test_201() {
+       (( MDS1_VERSION >= $(version_code 2.15.63) )) ||
+               skip "MDS < 2.15.63 doesn't support parallel disconnect"
+       (( MDSCOUNT >= 2 )) || skip_env "needs >= 2 MDTs"
+       (( OSTCOUNT >= 2 )) || skip_env "needs >= 2 OSTs"
+
+       # delay DISCONNECT for 8 seconds, on all OSTs and MDTs
+#define OBD_FAIL_OST_DISCONNECT_DELAY   0x245
+       do_nodes $(comma_list $(mdts_nodes)) "$LCTL set_param \
+                                             fail_loc=0x245 fail_val=8"
+       do_nodes $(comma_list $(osts_nodes)) "$LCTL set_param \
+                                             fail_loc=0x245 fail_val=8"
+
+       local start_time=$SECONDS
+
+       stop mds2
+
+       local duration=$((SECONDS - start_time))
+
+       start mds2 $(mdsdevname 2) $MDS_MOUNT_OPTS ||
+                       error "mount mds2 failed"
+       echo "Umount took $duration seconds"
+
+       #Valid timeout is 8 for MDTs + 8 for OSTs + 4 some for other umount
+       (( duration < 20 )) || error "Cascading timeouts on disconnect"
+}
+run_test 201 "MDT umount cascading disconnects timeouts"
+
+
 complete_test $SECONDS
 check_and_cleanup_lustre
 exit_status