From 2cc18ece1e50c760786a13a9dcb5857d7768cb0f Mon Sep 17 00:00:00 2001 From: Sergey Cheremencev Date: Sat, 20 Jan 2024 14:38:38 +0800 Subject: [PATCH] LU-14535 quota: free lvbo in a wq Mutex lqe_glbl_data_lock holded in qmt_lvbo_free might be the reason of sleeping while atommic if cfs_hash_for_each_relax is getting a spinlock on an upper layer: BUG: sleeping function called from invalid context at kernel/mutex.c:104 ... Call Trace: dump_stack+0x19/0x1b __might_sleep+0xd9/0x100 mutex_lock+0x20/0x40 qmt_lvbo_free+0xc7/0x380 [lquota] mdt_lvbo_free+0x12d/0x140 [mdt] ldlm_resource_putref+0x189/0x250 [ptlrpc] ldlm_lock_put+0x1c8/0x760 [ptlrpc] ldlm_export_lock_put+0x12/0x20 [ptlrpc] cfs_hash_for_each_relax+0x3ff/0x450 [libcfs] cfs_hash_for_each_empty+0x9a/0x210 [libcfs] ldlm_export_cancel_locks+0xc2/0x1a0 [ptlrpc] ldlm_bl_thread_main+0x7c8/0xb00 [ptlrpc] kthread+0xe4/0xf0 ret_from_fork_nospec_begin+0x7/0x21 Move freeing of lvbo to a workqueue. This patch could be probably reverted as soon as https://review.whamcloud.com/45882 will be landed. Fixes: 1dbcbd70f8 ("LU-15021 quota: protect lqe_glbl_data in lqe") Signed-off-by: Sergey Cheremencev Change-Id: I56aee72a7adbc6514b40689bae30669e607b5ecd Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54107 Reviewed-by: Andreas Dilger Reviewed-by: Hongchao Zhang Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/quota/lquota_internal.h | 1 + lustre/quota/qmt_dev.c | 13 +++++++++++++ lustre/quota/qmt_entry.c | 26 ++++++++++++++++++++++++++ lustre/quota/qmt_internal.h | 2 ++ lustre/quota/qmt_lock.c | 13 ++++--------- 5 files changed, 46 insertions(+), 9 deletions(-) diff --git a/lustre/quota/lquota_internal.h b/lustre/quota/lquota_internal.h index 645f42e..323cec8 100644 --- a/lustre/quota/lquota_internal.h +++ b/lustre/quota/lquota_internal.h @@ -195,6 +195,7 @@ struct lquota_entry { /* the lock to protect lqe_glbl_data */ struct mutex lqe_glbl_data_lock; struct lqe_glbl_data *lqe_glbl_data; + struct work_struct lqe_work; /* workitem to free lvbo */ }; #define lqe_qtype(lqe) (lqe->lqe_site->lqs_qtype) diff --git a/lustre/quota/qmt_dev.c b/lustre/quota/qmt_dev.c index 763baec..ae93750 100644 --- a/lustre/quota/qmt_dev.c +++ b/lustre/quota/qmt_dev.c @@ -97,6 +97,11 @@ static struct lu_device *qmt_device_fini(const struct lu_env *env, CDEBUG(D_QUOTA, "%s: initiating QMT shutdown\n", qmt->qmt_svname); qmt->qmt_stopping = true; + if (qmt_lvbo_free_wq) { + destroy_workqueue(qmt_lvbo_free_wq); + qmt_lvbo_free_wq = NULL; + } + /* kill pool instances, if any */ qmt_pool_fini(env, qmt); @@ -280,6 +285,14 @@ static int qmt_device_init0(const struct lu_env *env, struct qmt_device *qmt, if (rc) GOTO(out, rc); + qmt_lvbo_free_wq = alloc_workqueue("qmt_lvbo_free", WQ_UNBOUND, 0); + if (!qmt_lvbo_free_wq) { + rc = -ENOMEM; + CERROR("%s: failed to start qmt_lvbo_free workqueue: rc = %d\n", + qmt->qmt_svname, rc); + goto out; + } + EXIT; out: if (rc) diff --git a/lustre/quota/qmt_entry.c b/lustre/quota/qmt_entry.c index 7e67dea..6c37036 100644 --- a/lustre/quota/qmt_entry.c +++ b/lustre/quota/qmt_entry.c @@ -32,6 +32,31 @@ #include "qmt_internal.h" +static void qmt_work_lvbo_free(struct work_struct *work) +{ + struct lqe_glbl_data *lgd; + struct lquota_entry *lqe; + + lqe = container_of(work, struct lquota_entry, lqe_work); + mutex_lock(&lqe->lqe_glbl_data_lock); + lgd = lqe->lqe_glbl_data; + lqe->lqe_glbl_data = NULL; + mutex_unlock(&lqe->lqe_glbl_data_lock); + qmt_free_lqe_gd(lgd); + + if (unlikely(lgd == NULL)) { + struct qmt_pool_info *pool; + + pool = (struct qmt_pool_info *)lqe->lqe_site->lqs_parent; + CWARN("%s: lvbo for (id=%llx) not fully inited\n", + pool->qpi_qmt->qmt_svname, + lqe->lqe_id.qid_uid); + } + + /* release lqe reference */ + lqe_putref(lqe); +} + /* * Initialize qmt-specific fields of quota entry. * @@ -45,6 +70,7 @@ static void qmt_lqe_init(struct lquota_entry *lqe, void *arg) lqe->lqe_revoke_time = 0; init_rwsem(&lqe->lqe_sem); mutex_init(&lqe->lqe_glbl_data_lock); + INIT_WORK(&lqe->lqe_work, qmt_work_lvbo_free); } /* Apply the default quota setting to the specified quota entry diff --git a/lustre/quota/qmt_internal.h b/lustre/quota/qmt_internal.h index 9aabef1..52714fa 100644 --- a/lustre/quota/qmt_internal.h +++ b/lustre/quota/qmt_internal.h @@ -30,6 +30,8 @@ #include "lquota_internal.h" +extern struct workqueue_struct *qmt_lvbo_free_wq; + /* * The Quota Master Target Device. * The qmt is responsible for: diff --git a/lustre/quota/qmt_lock.c b/lustre/quota/qmt_lock.c index 7c78e7e..d847529 100644 --- a/lustre/quota/qmt_lock.c +++ b/lustre/quota/qmt_lock.c @@ -31,6 +31,7 @@ #define DEBUG_SUBSYSTEM S_LQUOTA #include +#include #include #include @@ -38,6 +39,8 @@ #include "qmt_internal.h" +struct workqueue_struct *qmt_lvbo_free_wq; + /* intent policy function called from mdt_intent_opc() when the intent is of * quota type */ int qmt_intent_policy(const struct lu_env *env, struct lu_device *ld, @@ -550,16 +553,8 @@ int qmt_lvbo_free(struct lu_device *ld, struct ldlm_resource *res) if (res->lr_name.name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] != 0) { struct lquota_entry *lqe = res->lr_lvb_data; - struct lqe_glbl_data *lgd; - mutex_lock(&lqe->lqe_glbl_data_lock); - lgd = lqe->lqe_glbl_data; - lqe->lqe_glbl_data = NULL; - mutex_unlock(&lqe->lqe_glbl_data_lock); - qmt_free_lqe_gd(lgd); - - /* release lqe reference */ - lqe_putref(lqe); + queue_work(qmt_lvbo_free_wq, &lqe->lqe_work); } else { struct dt_object *obj = res->lr_lvb_data; /* release object reference */ -- 1.8.3.1