From 76325fbb0d60b3b6524f101d5c1faa5e004610ec Mon Sep 17 00:00:00 2001 From: "Mr. NeilBrown" Date: Fri, 23 Feb 2024 16:07:35 -0500 Subject: [PATCH] LU-8066 obdclass: fix module load locking. Safe module loading requires that we try_module_get() in a context where the module cannot be unloaded, typically protected by a spinlock that module-unload has to take. This doesn't currently happen in class_get_type(). As free_module() calls synchronize_rcu() between calling the exit function and freeing the module, we can use rcu_read_lock() to check if the exit function has been called, and try_module_get() if it hasn't. We must also check the return status of try_module_get(). Linux-commit: 71707c276e0acff160e7f2bd38d5b61eb1f61ab2 Change-Id: Ia551a951db8fd97db51140123d340b1649a159cd Signed-off-by: Mr. NeilBrown Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/36043 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Timothy Day Reviewed-by: Oleg Drokin --- lustre/obdclass/genops.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 0607064..512146e 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -104,6 +104,7 @@ struct obd_type *class_get_type(const char *name) { struct obd_type *type; + rcu_read_lock(); type = class_search_type(name); #ifdef HAVE_MODULE_LOADING_SUPPORT if (!type) { @@ -120,17 +121,27 @@ struct obd_type *class_get_type(const char *name) modname = LUSTRE_MDT_NAME; #endif /* HAVE_SERVER_SUPPORT */ + rcu_read_unlock(); if (!request_module("%s", modname)) { CDEBUG(D_INFO, "Loaded module '%s'\n", modname); - type = class_search_type(name); } else { LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n", modname); } + rcu_read_lock(); + type = class_search_type(name); } #endif if (type) { - if (try_module_get(type->typ_dt_ops->o_owner)) { + /* + * Holding rcu_read_lock() matches the synchronize_rcu() call + * in free_module() and ensures that if type->typ_dt_ops is + * not yet NULL, then the module won't be freed until after + * we rcu_read_unlock(). + */ + const struct obd_ops *dt_ops = READ_ONCE(type->typ_dt_ops); + + if (dt_ops && try_module_get(dt_ops->o_owner)) { atomic_inc(&type->typ_refcnt); /* class_search_type() returned a counted ref, this * count not needed as we could get it via typ_refcnt @@ -141,6 +152,7 @@ struct obd_type *class_get_type(const char *name) type = NULL; } } + rcu_read_unlock(); return type; } EXPORT_SYMBOL(class_get_type); @@ -306,18 +318,24 @@ int class_unregister_type(const char *name) int rc = 0; ENTRY; - if (!type) { CERROR("unknown obd type\n"); RETURN(-EINVAL); } + /* + * Ensure that class_get_type doesn't try to get the module + * as it could be freed before the obd_type is released. + * synchronize_rcu() will be called before the module + * is freed. + */ + type->typ_dt_ops = NULL; + if (atomic_read(&type->typ_refcnt)) { CERROR("type %s has refcount (%d)\n", name, atomic_read(&type->typ_refcnt)); /* This is a bad situation, let's make the best of it */ /* Remove ops, but leave the name for debugging */ - type->typ_dt_ops = NULL; type->typ_md_ops = NULL; GOTO(out_put, rc = -EBUSY); } -- 1.8.3.1