Whamcloud - gitweb
LU-12780 osp: don't use ptlrpc_thread for lwp_notify_main()
[fs/lustre-release.git] / lustre / osp / lwp_dev.c
index c115ee6..6fdf950 100644 (file)
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2013, Intel Corporation.
+ * Copyright (c) 2013, 2017, Intel Corporation.
  * Use is subject to license terms.
  *
  * lustre/osp/lwp_dev.c
  *
- * Light Weight Proxy, which is just for managing the connection established
- * from OSTs/MDTs to MDT0.
+ * This file provides code related to the Light Weight Proxy (LWP) managing
+ * the connections established from OST to MDT, and MDT to MDT0.
+ *
+ * A LWP connection is used to send quota and FLD query requests. It's not
+ * recoverable, which means target server doesn't have an on-disk record in
+ * the last_rcvd file to remember the connection. Once LWP reconnect after
+ * server reboot, server will always regard it as a new connection.
  *
  * Author: <di.wang@intel.com>
  * Author: <yawei.niu@intel.com>
 #define DEBUG_SUBSYSTEM S_OST
 
 #include <obd_class.h>
-#include <lustre_param.h>
+#include <uapi/linux/lustre/lustre_param.h>
 #include <lustre_log.h>
-#include <libcfs/libcfs_string.h>
+#include <linux/kthread.h>
+
+#include "osp_internal.h"
 
 struct lwp_device {
        struct lu_device        lpd_dev;
-       struct obd_device       *lpd_obd;
-       struct obd_uuid         lpd_cluuid;
-       struct obd_export       *lpd_exp;
-       int                     lpd_connects;
+       struct obd_device      *lpd_obd;   /* corresponding OBD device */
+       struct obd_export      *lpd_exp;   /* export of LWP */
+       struct task_struct     *lpd_notify_task; /* notify thread */
+       int                     lpd_connects; /* use count, 0 or 1 */
 };
 
 static inline struct lwp_device *lu2lwp_dev(struct lu_device *d)
@@ -57,6 +64,16 @@ static inline struct lu_device *lwp2lu_dev(struct lwp_device *d)
        return &d->lpd_dev;
 }
 
+/**
+ * Setup LWP device.
+ *
+ * \param[in] env      environment passed by caller
+ * \param[in] lwp      LWP device to be setup
+ * \param[in] nidstring        remote target NID
+ *
+ * \retval             0 on success
+ * \retval             negative number on error
+ */
 static int lwp_setup(const struct lu_env *env, struct lwp_device *lwp,
                     char *nidstring)
 {
@@ -65,12 +82,13 @@ static int lwp_setup(const struct lu_env *env, struct lwp_device *lwp,
        char                    *lwp_name = lwp->lpd_obd->obd_name;
        char                    *server_uuid = NULL;
        char                    *ptr;
-       class_uuid_t             uuid;
        struct obd_import       *imp;
-       int                      len = strlen(lwp_name);
+       int                      len = strlen(lwp_name) + 1;
        int                      rc;
        ENTRY;
 
+       lwp->lpd_notify_task = NULL;
+
        OBD_ALLOC_PTR(bufs);
        if (bufs == NULL)
                RETURN(-ENOMEM);
@@ -89,13 +107,14 @@ static int lwp_setup(const struct lu_env *env, struct lwp_device *lwp,
 
        strncpy(server_uuid, lwp_name, ptr - lwp_name);
        server_uuid[ptr - lwp_name] = '\0';
-       strncat(server_uuid, "_UUID", len - 1);
+       strlcat(server_uuid, "_UUID", len);
        lustre_cfg_bufs_reset(bufs, lwp_name);
        lustre_cfg_bufs_set_string(bufs, 1, server_uuid);
        lustre_cfg_bufs_set_string(bufs, 2, nidstring);
-       lcfg = lustre_cfg_new(LCFG_SETUP, bufs);
-       if (lcfg == NULL)
+       OBD_ALLOC(lcfg, lustre_cfg_len(bufs->lcfg_bufcount, bufs->lcfg_buflen));
+       if (!lcfg)
                GOTO(out, rc = -ENOMEM);
+       lustre_cfg_init(lcfg, LCFG_SETUP, bufs);
 
        rc = client_obd_setup(lwp->lpd_obd, lcfg);
        if (rc != 0) {
@@ -106,24 +125,28 @@ static int lwp_setup(const struct lu_env *env, struct lwp_device *lwp,
 
        imp = lwp->lpd_obd->u.cli.cl_import;
        rc = ptlrpc_init_import(imp);
-       if (rc)
-               GOTO(out, rc);
-
-       ll_generate_random_uuid(uuid);
-       class_uuid_unparse(uuid, &lwp->lpd_cluuid);
 out:
        if (bufs != NULL)
                OBD_FREE_PTR(bufs);
        if (server_uuid != NULL)
                OBD_FREE(server_uuid, len);
-       if (lcfg != NULL)
-               lustre_cfg_free(lcfg);
+       if (lcfg)
+               OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount,
+                                             lcfg->lcfg_buflens));
        if (rc)
                client_obd_cleanup(lwp->lpd_obd);
 
        RETURN(rc);
 }
 
+/**
+ * Disconnect the import from LWP.
+ *
+ * \param[in] d                LWP device to be disconnected
+ *
+ * \retval             0 on success
+ * \retval             negative number on error
+ */
 static int lwp_disconnect(struct lwp_device *d)
 {
        struct obd_import *imp;
@@ -131,9 +154,12 @@ static int lwp_disconnect(struct lwp_device *d)
 
        imp = d->lpd_obd->u.cli.cl_import;
 
-       /* Mark import deactivated now, so we don't try to reconnect if any
+       /*
+        * Mark import deactivated now, so we don't try to reconnect if any
         * of the cleanup RPCs fails (e.g. ldlm cancel, etc).  We don't
-        * fully deactivate the import, or that would drop all requests. */
+        * fully deactivate the import because that would cause all requests
+        * to be dropped.
+        */
        LASSERT(imp != NULL);
        spin_lock(&imp->imp_lock);
        imp->imp_deactive = 1;
@@ -141,9 +167,11 @@ static int lwp_disconnect(struct lwp_device *d)
 
        ptlrpc_deactivate_import(imp);
 
-       /* Some non-replayable imports (MDS's OSCs) are pinged, so just
+       /*
+        * Some non-replayable imports (MDS's OSCs) are pinged, so just
         * delete it regardless.  (It's safe to delete an import that was
-        * never added.) */
+        * never added.)
+        */
        ptlrpc_pinger_del_import(imp);
        rc = ptlrpc_disconnect_import(imp, 0);
        if (rc != 0)
@@ -155,6 +183,18 @@ static int lwp_disconnect(struct lwp_device *d)
        RETURN(rc);
 }
 
+/**
+ * Implementation of lu_device_operations::ldo_process_config.
+ *
+ * Process a Lustre configuration request.
+ *
+ * \param[in] env      environment passed by caller
+ * \param[in] dev      device to be processed
+ * \param[in] lcfg     lustre_cfg, LCFG_PRE_CLEANUP or LCFG_CLEANUP
+ *
+ * \retval             0 on success
+ * \retval             negative number on error
+ */
 static int lwp_process_config(const struct lu_env *env,
                              struct lu_device *dev, struct lustre_cfg *lcfg)
 {
@@ -180,12 +220,24 @@ static int lwp_process_config(const struct lu_env *env,
        RETURN(rc);
 }
 
-const struct lu_device_operations lwp_lu_ops = {
+static const struct lu_device_operations lwp_lu_ops = {
        .ldo_process_config     = lwp_process_config,
 };
 
-int lwp_init0(const struct lu_env *env, struct lwp_device *lwp,
-             struct lu_device_type *ldt, struct lustre_cfg *cfg)
+/**
+ * Initialize LWP device.
+ *
+ * \param[in] env      environment passed by caller
+ * \param[in] lwp      device to be initialized
+ * \param[in] ldt      not used
+ * \param[in] cfg      lustre_cfg contains remote target uuid
+ *
+ * \retval             0 on success
+ * \retval             -ENODEV if the device name cannot be found
+ * \retval             negative numbers on other errors
+ */
+static int lwp_init0(const struct lu_env *env, struct lwp_device *lwp,
+                    struct lu_device_type *ldt, struct lustre_cfg *cfg)
 {
        int                        rc;
        ENTRY;
@@ -207,20 +259,48 @@ int lwp_init0(const struct lu_env *env, struct lwp_device *lwp,
                RETURN(rc);
        }
 
+       rc = lprocfs_obd_setup(lwp->lpd_obd, true);
+       if (rc) {
+               CERROR("%s: lprocfs_obd_setup failed. %d\n",
+                      lwp->lpd_obd->obd_name, rc);
+               ptlrpcd_decref();
+               RETURN(rc);
+       }
+
        rc = lwp_setup(env, lwp, lustre_cfg_string(cfg, 1));
        if (rc) {
                CERROR("%s: setup lwp failed. %d\n",
                       lwp->lpd_obd->obd_name, rc);
+               lprocfs_obd_cleanup(lwp->lpd_obd);
+               ptlrpcd_decref();
+               RETURN(rc);
+       }
+
+       rc = sptlrpc_lprocfs_cliobd_attach(lwp->lpd_obd);
+       if (rc) {
+               CERROR("%s: sptlrpc_lprocfs_cliobd_attached failed. %d\n",
+                      lwp->lpd_obd->obd_name, rc);
                ptlrpcd_decref();
                RETURN(rc);
        }
 
-       if (lprocfs_seq_obd_setup(lwp->lpd_obd) == 0)
-               ptlrpc_lprocfs_register_obd(lwp->lpd_obd);
+       ptlrpc_lprocfs_register_obd(lwp->lpd_obd);
 
        RETURN(0);
 }
 
+/**
+ * Implementation of lu_device_type_operations::ldto_device_free.
+ *
+ * Free a LWP device.
+ *
+ * \param[in] env      environment passed by caller
+ * \param[in] lu       device to be freed
+ *
+ * \retval             NULL to indicate that this is the bottom device
+ *                     of the stack and there are no more devices
+ *                     below this one to be cleaned up.
+ */
 static struct lu_device *lwp_device_free(const struct lu_env *env,
                                         struct lu_device *lu)
 {
@@ -236,56 +316,76 @@ static struct lu_device *lwp_device_free(const struct lu_env *env,
        RETURN(NULL);
 }
 
+/**
+ * Implementation of lu_device_type_operations::ldto_device_alloc.
+ *
+ * Allocate a LWP device.
+ *
+ * \param[in] env      environment passed by caller
+ * \param[in] ldt      device type whose name is LUSTRE_LWP_NAME
+ * \param[in] lcfg     lustre_cfg contains remote target UUID
+ *
+ * \retval             pointer of allocated LWP device on success
+ * \retval             ERR_PTR(errno) on error
+ */
 static struct lu_device *lwp_device_alloc(const struct lu_env *env,
-                                         struct lu_device_type *t,
+                                         struct lu_device_type *ldt,
                                          struct lustre_cfg *lcfg)
 {
        struct lwp_device *lwp;
-       struct lu_device  *l;
+       struct lu_device  *ludev;
 
        OBD_ALLOC_PTR(lwp);
        if (lwp == NULL) {
-               l = ERR_PTR(-ENOMEM);
+               ludev = ERR_PTR(-ENOMEM);
        } else {
                int rc;
 
-               l = lwp2lu_dev(lwp);
-               lu_device_init(&lwp->lpd_dev, t);
-               rc = lwp_init0(env, lwp, t, lcfg);
+               ludev = lwp2lu_dev(lwp);
+               lu_device_init(&lwp->lpd_dev, ldt);
+               rc = lwp_init0(env, lwp, ldt, lcfg);
                if (rc != 0) {
-                       lwp_device_free(env, l);
-                       l = ERR_PTR(rc);
+                       lwp_device_free(env, ludev);
+                       ludev = ERR_PTR(rc);
                }
        }
-       return l;
+       return ludev;
 }
 
 
+/**
+ * Implementation of lu_device_type_operations::ltdo_device_fini.
+ *
+ * Finalize LWP device.
+ *
+ * \param[in] env      environment passed by caller
+ * \param[in] ludev    device to be finalized
+ *
+ * \retval             NULL on success
+ */
 static struct lu_device *lwp_device_fini(const struct lu_env *env,
-                                        struct lu_device *d)
+                                        struct lu_device *ludev)
 {
-       struct lwp_device *m = lu2lwp_dev(d);
-       struct obd_import *imp;
-       int                rc;
+       struct lwp_device       *m = lu2lwp_dev(ludev);
+       struct task_struct      *task = NULL;
+       int                      rc;
        ENTRY;
 
+       task = xchg(&m->lpd_notify_task, NULL);
+       if (task) {
+               kthread_stop(task);
+               class_export_put(m->lpd_exp);
+       }
+
        if (m->lpd_exp != NULL)
                class_disconnect(m->lpd_exp);
 
-       imp = m->lpd_obd->u.cli.cl_import;
-
-       if (imp->imp_rq_pool) {
-               ptlrpc_free_rq_pool(imp->imp_rq_pool);
-               imp->imp_rq_pool = NULL;
-       }
-
        LASSERT(m->lpd_obd);
-       ptlrpc_lprocfs_unregister_obd(m->lpd_obd);
-       lprocfs_obd_cleanup(m->lpd_obd);
-
        rc = client_obd_cleanup(m->lpd_obd);
        LASSERTF(rc == 0, "error %d\n", rc);
 
+       ptlrpc_lprocfs_unregister_obd(m->lpd_obd);
+
        ptlrpcd_decref();
 
        RETURN(NULL);
@@ -304,6 +404,80 @@ struct lu_device_type lwp_device_type = {
        .ldt_ctx_tags = LCT_MD_THREAD
 };
 
+static int lwp_notify_main(void *args)
+{
+       struct obd_export       *exp = (struct obd_export *)args;
+       struct lwp_device       *lwp;
+
+       LASSERT(exp != NULL);
+
+       lwp = lu2lwp_dev(exp->exp_obd->obd_lu_dev);
+
+       lustre_notify_lwp_list(exp);
+
+       if (xchg(&lwp->lpd_notify_task, NULL) == NULL)
+               /* lwp_device_fini() is waiting for me
+                * Note that the wakeup comes direct from
+                * kthread_stop, not from wake_up_var().
+                * lwp_device_fini() will call class_export_put().
+                */
+               wait_var_event(lwp, kthread_should_stop());
+       else
+               class_export_put(exp);
+
+       return 0;
+}
+
+/*
+ * Some notify callbacks may cause deadlock in failover
+ * scenario, so we have to start thread to run callbacks
+ * asynchronously. See LU-6273.
+ */
+static void lwp_notify_users(struct obd_export *exp)
+{
+       struct lwp_device       *lwp;
+       struct task_struct      *task;
+       char                     name[MTI_NAME_MAXLEN];
+
+       LASSERT(exp != NULL);
+       lwp = lu2lwp_dev(exp->exp_obd->obd_lu_dev);
+
+       snprintf(name, MTI_NAME_MAXLEN, "lwp_notify_%s",
+                exp->exp_obd->obd_name);
+
+       /* Notify happens only on LWP setup, so there shouldn't
+        * be notify thread running */
+       if (lwp->lpd_notify_task) {
+               CERROR("LWP notify thread: %s wasn't stopped\n", name);
+               return;
+       }
+
+       task = kthread_create(lwp_notify_main, exp, name);
+       if (IS_ERR(task)) {
+               CERROR("Failed to start LWP notify thread:%s. %lu\n",
+                      name, PTR_ERR(task));
+       } else {
+               lwp->lpd_notify_task = task;
+               class_export_get(exp);
+               wake_up_process(task);
+       }
+}
+
+/**
+ * Implementation of OBD device operations obd_ops::o_connect.
+ *
+ * Create export for LWP, and connect to target server.
+ *
+ * \param[in] env      the environment passed by caller
+ * \param[out] exp     export for the connection to be established
+ * \param[in] obd      OBD device to perform the connect on
+ * \param[in] cluuid   UUID of the OBD device
+ * \param[in] data     connect data containing compatibility flags
+ * \param[in] localdata        not used
+ *
+ * \retval             0 on success
+ * \retval             negative number on error
+ */
 static int lwp_obd_connect(const struct lu_env *env, struct obd_export **exp,
                           struct obd_device *obd, struct obd_uuid *cluuid,
                           struct obd_connect_data *data, void *localdata)
@@ -328,7 +502,6 @@ static int lwp_obd_connect(const struct lu_env *env, struct obd_export **exp,
        *exp = class_conn2export(&conn);
        lwp->lpd_exp = *exp;
 
-       /* Why should there ever be more than 1 connect? */
        lwp->lpd_connects++;
        LASSERT(lwp->lpd_connects == 1);
 
@@ -345,6 +518,7 @@ static int lwp_obd_connect(const struct lu_env *env, struct obd_export **exp,
 
        ocd->ocd_version = LUSTRE_VERSION_CODE;
        imp->imp_connect_flags_orig = ocd->ocd_connect_flags;
+       imp->imp_connect_flags2_orig = ocd->ocd_connect_flags2;
 
        rc = ptlrpc_connect_import(imp);
        if (rc != 0) {
@@ -366,9 +540,23 @@ out_dis:
 out_sem:
        up_write(&cli->cl_sem);
 
+       if (rc == 0)
+               lwp_notify_users(*exp);
+
        return rc;
 }
 
+/**
+ * Implementation of OBD device operations obd_ops::o_disconnect.
+ *
+ * Release export for the LWP. Only disconnect the underlying layers
+ * on the final disconnect.
+ *
+ * \param[in] exp      the export to perform disconnect on
+ *
+ * \retval             0 on success
+ * \retval             negative number on error
+ */
 static int lwp_obd_disconnect(struct obd_export *exp)
 {
        struct obd_device *obd = exp->exp_obd;
@@ -376,7 +564,6 @@ static int lwp_obd_disconnect(struct obd_export *exp)
        int                rc;
        ENTRY;
 
-       /* Only disconnect the underlying layers on the final disconnect. */
        LASSERT(lwp->lpd_connects == 1);
        lwp->lpd_connects--;
 
@@ -388,6 +575,16 @@ static int lwp_obd_disconnect(struct obd_export *exp)
        RETURN(rc);
 }
 
+/**
+ * Handle import events for the LWP device.
+ *
+ * \param[in] obd      OBD device associated with the import
+ * \param[in] imp      the import which event happened on
+ * \param[in] event    event type
+ *
+ * \retval             0 on success
+ * \retval             negative number on error
+ */
 static int lwp_import_event(struct obd_device *obd, struct obd_import *imp,
                            enum obd_import_event event)
 {
@@ -410,11 +607,29 @@ static int lwp_import_event(struct obd_device *obd, struct obd_import *imp,
        return 0;
 }
 
-struct obd_ops lwp_obd_device_ops = {
+static int lwp_set_info_async(const struct lu_env *env,
+                             struct obd_export *exp,
+                             u32 keylen, void *key,
+                             u32 vallen, void *val,
+                             struct ptlrpc_request_set *set)
+{
+       ENTRY;
+
+       if (KEY_IS(KEY_SPTLRPC_CONF)) {
+               sptlrpc_conf_client_adapt(exp->exp_obd);
+               RETURN(0);
+       }
+
+       CERROR("Unknown key %s\n", (char *)key);
+       RETURN(-EINVAL);
+}
+
+const struct obd_ops lwp_obd_device_ops = {
        .o_owner        = THIS_MODULE,
        .o_add_conn     = client_import_add_conn,
        .o_del_conn     = client_import_del_conn,
        .o_connect      = lwp_obd_connect,
        .o_disconnect   = lwp_obd_disconnect,
        .o_import_event = lwp_import_event,
+       .o_set_info_async   = lwp_set_info_async,
 };