Mutex lqe_glbl_data_lock holded in
qmt_lvbo_free might be the reason of
sleeping while atommic if
cfs_hash_for_each_relax is getting a
spinlock on an upper layer:
BUG: sleeping function called from invalid
context at kernel/mutex.c:104
...
Call Trace:
dump_stack+0x19/0x1b
__might_sleep+0xd9/0x100
mutex_lock+0x20/0x40
qmt_lvbo_free+0xc7/0x380 [lquota]
mdt_lvbo_free+0x12d/0x140 [mdt]
ldlm_resource_putref+0x189/0x250 [ptlrpc]
ldlm_lock_put+0x1c8/0x760 [ptlrpc]
ldlm_export_lock_put+0x12/0x20 [ptlrpc]
cfs_hash_for_each_relax+0x3ff/0x450 [libcfs]
cfs_hash_for_each_empty+0x9a/0x210 [libcfs]
ldlm_export_cancel_locks+0xc2/0x1a0 [ptlrpc]
ldlm_bl_thread_main+0x7c8/0xb00 [ptlrpc]
kthread+0xe4/0xf0
ret_from_fork_nospec_begin+0x7/0x21
Move freeing of lvbo to a workqueue. This
patch could be probably reverted as soon
as https://review.whamcloud.com/45882 will
be landed.
Fixes:
1dbcbd70f8 ("LU-15021 quota: protect lqe_glbl_data in lqe")
Signed-off-by: Sergey Cheremencev <scherementsev@ddn.com>
Change-Id: I56aee72a7adbc6514b40689bae30669e607b5ecd
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54107
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
/* the lock to protect lqe_glbl_data */
struct mutex lqe_glbl_data_lock;
struct lqe_glbl_data *lqe_glbl_data;
+ struct work_struct lqe_work; /* workitem to free lvbo */
};
#define lqe_qtype(lqe) (lqe->lqe_site->lqs_qtype)
CDEBUG(D_QUOTA, "%s: initiating QMT shutdown\n", qmt->qmt_svname);
qmt->qmt_stopping = true;
+ if (qmt_lvbo_free_wq) {
+ destroy_workqueue(qmt_lvbo_free_wq);
+ qmt_lvbo_free_wq = NULL;
+ }
+
/* kill pool instances, if any */
qmt_pool_fini(env, qmt);
if (rc)
GOTO(out, rc);
+ qmt_lvbo_free_wq = alloc_workqueue("qmt_lvbo_free", WQ_UNBOUND, 0);
+ if (!qmt_lvbo_free_wq) {
+ rc = -ENOMEM;
+ CERROR("%s: failed to start qmt_lvbo_free workqueue: rc = %d\n",
+ qmt->qmt_svname, rc);
+ goto out;
+ }
+
EXIT;
out:
if (rc)
#include "qmt_internal.h"
+static void qmt_work_lvbo_free(struct work_struct *work)
+{
+ struct lqe_glbl_data *lgd;
+ struct lquota_entry *lqe;
+
+ lqe = container_of(work, struct lquota_entry, lqe_work);
+ mutex_lock(&lqe->lqe_glbl_data_lock);
+ lgd = lqe->lqe_glbl_data;
+ lqe->lqe_glbl_data = NULL;
+ mutex_unlock(&lqe->lqe_glbl_data_lock);
+ qmt_free_lqe_gd(lgd);
+
+ if (unlikely(lgd == NULL)) {
+ struct qmt_pool_info *pool;
+
+ pool = (struct qmt_pool_info *)lqe->lqe_site->lqs_parent;
+ CWARN("%s: lvbo for (id=%llx) not fully inited\n",
+ pool->qpi_qmt->qmt_svname,
+ lqe->lqe_id.qid_uid);
+ }
+
+ /* release lqe reference */
+ lqe_putref(lqe);
+}
+
/*
* Initialize qmt-specific fields of quota entry.
*
lqe->lqe_revoke_time = 0;
init_rwsem(&lqe->lqe_sem);
mutex_init(&lqe->lqe_glbl_data_lock);
+ INIT_WORK(&lqe->lqe_work, qmt_work_lvbo_free);
}
/* Apply the default quota setting to the specified quota entry
#include "lquota_internal.h"
+extern struct workqueue_struct *qmt_lvbo_free_wq;
+
/*
* The Quota Master Target Device.
* The qmt is responsible for:
#define DEBUG_SUBSYSTEM S_LQUOTA
#include <linux/kthread.h>
+#include <linux/workqueue.h>
#include <lustre_dlm.h>
#include <lustre_swab.h>
#include "qmt_internal.h"
+struct workqueue_struct *qmt_lvbo_free_wq;
+
/* intent policy function called from mdt_intent_opc() when the intent is of
* quota type */
int qmt_intent_policy(const struct lu_env *env, struct lu_device *ld,
if (res->lr_name.name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] != 0) {
struct lquota_entry *lqe = res->lr_lvb_data;
- struct lqe_glbl_data *lgd;
- mutex_lock(&lqe->lqe_glbl_data_lock);
- lgd = lqe->lqe_glbl_data;
- lqe->lqe_glbl_data = NULL;
- mutex_unlock(&lqe->lqe_glbl_data_lock);
- qmt_free_lqe_gd(lgd);
-
- /* release lqe reference */
- lqe_putref(lqe);
+ queue_work(qmt_lvbo_free_wq, &lqe->lqe_work);
} else {
struct dt_object *obj = res->lr_lvb_data;
/* release object reference */