Whamcloud - gitweb
LU-14535 quota: free lvbo in a wq 07/54107/6
authorSergey Cheremencev <scherementsev@ddn.com>
Sat, 20 Jan 2024 06:38:38 +0000 (14:38 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 23 Mar 2024 05:55:16 +0000 (05:55 +0000)
Mutex lqe_glbl_data_lock holded in
qmt_lvbo_free might be the reason of
sleeping while atommic if
cfs_hash_for_each_relax is getting a
spinlock on an upper layer:

 BUG: sleeping function called from invalid
context at kernel/mutex.c:104
 ...
 Call Trace:
 dump_stack+0x19/0x1b
 __might_sleep+0xd9/0x100
 mutex_lock+0x20/0x40
 qmt_lvbo_free+0xc7/0x380 [lquota]
 mdt_lvbo_free+0x12d/0x140 [mdt]
 ldlm_resource_putref+0x189/0x250 [ptlrpc]
 ldlm_lock_put+0x1c8/0x760 [ptlrpc]
 ldlm_export_lock_put+0x12/0x20 [ptlrpc]
 cfs_hash_for_each_relax+0x3ff/0x450 [libcfs]
 cfs_hash_for_each_empty+0x9a/0x210 [libcfs]
 ldlm_export_cancel_locks+0xc2/0x1a0 [ptlrpc]
 ldlm_bl_thread_main+0x7c8/0xb00 [ptlrpc]
 kthread+0xe4/0xf0
 ret_from_fork_nospec_begin+0x7/0x21

Move freeing of lvbo to a workqueue. This
patch could be probably reverted as soon
as https://review.whamcloud.com/45882 will
be landed.

Fixes: 1dbcbd70f8 ("LU-15021 quota: protect lqe_glbl_data in lqe")
Signed-off-by: Sergey Cheremencev <scherementsev@ddn.com>
Change-Id: I56aee72a7adbc6514b40689bae30669e607b5ecd
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54107
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/quota/lquota_internal.h
lustre/quota/qmt_dev.c
lustre/quota/qmt_entry.c
lustre/quota/qmt_internal.h
lustre/quota/qmt_lock.c

index 645f42e..323cec8 100644 (file)
@@ -195,6 +195,7 @@ struct lquota_entry {
        /* the lock to protect lqe_glbl_data */
        struct mutex             lqe_glbl_data_lock;
        struct lqe_glbl_data    *lqe_glbl_data;
+       struct work_struct       lqe_work; /* workitem to free lvbo */
 };
 
 #define lqe_qtype(lqe)         (lqe->lqe_site->lqs_qtype)
index 763baec..ae93750 100644 (file)
@@ -97,6 +97,11 @@ static struct lu_device *qmt_device_fini(const struct lu_env *env,
        CDEBUG(D_QUOTA, "%s: initiating QMT shutdown\n", qmt->qmt_svname);
        qmt->qmt_stopping = true;
 
+       if (qmt_lvbo_free_wq) {
+               destroy_workqueue(qmt_lvbo_free_wq);
+               qmt_lvbo_free_wq = NULL;
+       }
+
        /* kill pool instances, if any */
        qmt_pool_fini(env, qmt);
 
@@ -280,6 +285,14 @@ static int qmt_device_init0(const struct lu_env *env, struct qmt_device *qmt,
        if (rc)
                GOTO(out, rc);
 
+       qmt_lvbo_free_wq = alloc_workqueue("qmt_lvbo_free", WQ_UNBOUND, 0);
+       if (!qmt_lvbo_free_wq) {
+               rc = -ENOMEM;
+               CERROR("%s: failed to start qmt_lvbo_free workqueue: rc = %d\n",
+                      qmt->qmt_svname, rc);
+               goto out;
+       }
+
        EXIT;
 out:
        if (rc)
index 7e67dea..6c37036 100644 (file)
 
 #include "qmt_internal.h"
 
+static void qmt_work_lvbo_free(struct work_struct *work)
+{
+       struct lqe_glbl_data *lgd;
+       struct lquota_entry *lqe;
+
+       lqe = container_of(work, struct lquota_entry, lqe_work);
+       mutex_lock(&lqe->lqe_glbl_data_lock);
+       lgd = lqe->lqe_glbl_data;
+       lqe->lqe_glbl_data = NULL;
+       mutex_unlock(&lqe->lqe_glbl_data_lock);
+       qmt_free_lqe_gd(lgd);
+
+       if (unlikely(lgd == NULL)) {
+               struct qmt_pool_info *pool;
+
+               pool = (struct qmt_pool_info *)lqe->lqe_site->lqs_parent;
+               CWARN("%s: lvbo for (id=%llx) not fully inited\n",
+                     pool->qpi_qmt->qmt_svname,
+                     lqe->lqe_id.qid_uid);
+       }
+
+       /* release lqe reference */
+       lqe_putref(lqe);
+}
+
 /*
  * Initialize qmt-specific fields of quota entry.
  *
@@ -45,6 +70,7 @@ static void qmt_lqe_init(struct lquota_entry *lqe, void *arg)
        lqe->lqe_revoke_time = 0;
        init_rwsem(&lqe->lqe_sem);
        mutex_init(&lqe->lqe_glbl_data_lock);
+       INIT_WORK(&lqe->lqe_work, qmt_work_lvbo_free);
 }
 
 /* Apply the default quota setting to the specified quota entry
index 9aabef1..52714fa 100644 (file)
@@ -30,6 +30,8 @@
 
 #include "lquota_internal.h"
 
+extern struct workqueue_struct *qmt_lvbo_free_wq;
+
 /*
  * The Quota Master Target Device.
  * The qmt is responsible for:
index 7c78e7e..d847529 100644 (file)
@@ -31,6 +31,7 @@
 #define DEBUG_SUBSYSTEM S_LQUOTA
 
 #include <linux/kthread.h>
+#include <linux/workqueue.h>
 
 #include <lustre_dlm.h>
 #include <lustre_swab.h>
@@ -38,6 +39,8 @@
 
 #include "qmt_internal.h"
 
+struct workqueue_struct *qmt_lvbo_free_wq;
+
 /* intent policy function called from mdt_intent_opc() when the intent is of
  * quota type */
 int qmt_intent_policy(const struct lu_env *env, struct lu_device *ld,
@@ -550,16 +553,8 @@ int qmt_lvbo_free(struct lu_device *ld, struct ldlm_resource *res)
 
        if (res->lr_name.name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] != 0) {
                struct lquota_entry *lqe = res->lr_lvb_data;
-               struct lqe_glbl_data *lgd;
 
-               mutex_lock(&lqe->lqe_glbl_data_lock);
-               lgd = lqe->lqe_glbl_data;
-               lqe->lqe_glbl_data = NULL;
-               mutex_unlock(&lqe->lqe_glbl_data_lock);
-               qmt_free_lqe_gd(lgd);
-
-               /* release lqe reference */
-               lqe_putref(lqe);
+               queue_work(qmt_lvbo_free_wq, &lqe->lqe_work);
        } else {
                struct dt_object *obj = res->lr_lvb_data;
                /* release object reference */