Whamcloud - gitweb
LU-8066 obdclass: fix module load locking. 43/36043/9
authorMr. NeilBrown <neilb@suse.de>
Fri, 23 Feb 2024 21:07:35 +0000 (16:07 -0500)
committerOleg Drokin <green@whamcloud.com>
Wed, 13 Mar 2024 03:22:50 +0000 (03:22 +0000)
Safe module loading requires that we try_module_get() in a context
where the module cannot be unloaded, typically protected by
a spinlock that module-unload has to take.
This doesn't currently happen in class_get_type().

As free_module() calls synchronize_rcu() between calling the
exit function and freeing the module, we can use rcu_read_lock()
to check if the exit function has been called, and try_module_get()
if it hasn't.

We must also check the return status of try_module_get().

Linux-commit: 71707c276e0acff160e7f2bd38d5b61eb1f61ab2

Change-Id: Ia551a951db8fd97db51140123d340b1649a159cd
Signed-off-by: Mr. NeilBrown <neilb@suse.de>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/36043
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Timothy Day <timday@amazon.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/obdclass/genops.c

index 0607064..512146e 100644 (file)
@@ -104,6 +104,7 @@ struct obd_type *class_get_type(const char *name)
 {
        struct obd_type *type;
 
+       rcu_read_lock();
        type = class_search_type(name);
 #ifdef HAVE_MODULE_LOADING_SUPPORT
        if (!type) {
@@ -120,17 +121,27 @@ struct obd_type *class_get_type(const char *name)
                        modname = LUSTRE_MDT_NAME;
 #endif /* HAVE_SERVER_SUPPORT */
 
+               rcu_read_unlock();
                if (!request_module("%s", modname)) {
                        CDEBUG(D_INFO, "Loaded module '%s'\n", modname);
-                       type = class_search_type(name);
                } else {
                        LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n",
                                           modname);
                }
+               rcu_read_lock();
+               type = class_search_type(name);
        }
 #endif
        if (type) {
-               if (try_module_get(type->typ_dt_ops->o_owner)) {
+               /*
+                * Holding rcu_read_lock() matches the synchronize_rcu() call
+                * in free_module() and ensures that if type->typ_dt_ops is
+                * not yet NULL, then the module won't be freed until after
+                * we rcu_read_unlock().
+                */
+               const struct obd_ops *dt_ops = READ_ONCE(type->typ_dt_ops);
+
+               if (dt_ops && try_module_get(dt_ops->o_owner)) {
                        atomic_inc(&type->typ_refcnt);
                        /* class_search_type() returned a counted ref, this
                         * count not needed as we could get it via typ_refcnt
@@ -141,6 +152,7 @@ struct obd_type *class_get_type(const char *name)
                        type = NULL;
                }
        }
+       rcu_read_unlock();
        return type;
 }
 EXPORT_SYMBOL(class_get_type);
@@ -306,18 +318,24 @@ int class_unregister_type(const char *name)
        int rc = 0;
 
        ENTRY;
-
        if (!type) {
                CERROR("unknown obd type\n");
                RETURN(-EINVAL);
        }
 
+       /*
+        * Ensure that class_get_type doesn't try to get the module
+        * as it could be freed before the obd_type is released.
+        * synchronize_rcu() will be called before the module
+        * is freed.
+        */
+       type->typ_dt_ops = NULL;
+
        if (atomic_read(&type->typ_refcnt)) {
                CERROR("type %s has refcount (%d)\n", name,
                       atomic_read(&type->typ_refcnt));
                /* This is a bad situation, let's make the best of it */
                /* Remove ops, but leave the name for debugging */
-               type->typ_dt_ops = NULL;
                type->typ_md_ops = NULL;
                GOTO(out_put, rc = -EBUSY);
        }