Whamcloud - gitweb
EX-8190 quota: fix race in qmt_seed_glbe
authorHongchao Zhang <hongchao@whamcloud.com>
Thu, 13 Jul 2023 02:56:58 +0000 (10:56 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 14 Sep 2023 07:25:31 +0000 (07:25 +0000)
There is a deadlock in qmt_pool_recalc:
The rw_semaphore "qmt_pool_info.qpi_sarr.osts.op_rw_sem" has been
acquired in qmt_pool_recalc (read mode), but it is acquired once
more in qmt_seed_glbe_all (read mode), and it will be stuck if
there is pending write mode lock acquisition.

Signed-off-by: Hongchao Zhang <hongchao@whamcloud.com>
Change-Id: Ib7db17700a90feaa9bfe8300bab509567ac1ed21
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52346
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Sergey Cheremencev <scherementsev@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/quota/qmt_entry.c
lustre/quota/qmt_handler.c
lustre/quota/qmt_internal.h
lustre/quota/qmt_pool.c

index 13081e7..cfb3baa 100644 (file)
@@ -857,7 +857,7 @@ bool qmt_adjust_edquot_qunit_notify(const struct lu_env *env,
                mutex_lock(&lqe_gl->lqe_glbl_data_lock);
                if (lqe_gl->lqe_glbl_data)
                        qmt_seed_glbe_all(env, lqe_gl->lqe_glbl_data, qunit,
-                                         edquot);
+                                         edquot, false);
                mutex_unlock(&lqe_gl->lqe_glbl_data_lock);
 
                qmt_id_lock_notify(qmt, lqe_gl);
@@ -909,7 +909,7 @@ void qmt_revalidate_lqes(const struct lu_env *env,
 
        mutex_lock(&lqe_gl->lqe_glbl_data_lock);
        if (lqe_gl->lqe_glbl_data)
-               qmt_seed_glbe(env, lqe_gl->lqe_glbl_data);
+               qmt_seed_glbe(env, lqe_gl->lqe_glbl_data, false);
        mutex_unlock(&lqe_gl->lqe_glbl_data_lock);
 
        qmt_id_lock_notify(qmt, lqe_gl);
@@ -1107,7 +1107,7 @@ void qmt_free_lqe_gd(struct lqe_glbl_data *lgd)
 }
 
 void qmt_seed_glbe_all(const struct lu_env *env, struct lqe_glbl_data *lgd,
-                      bool qunit, bool edquot)
+                      bool qunit, bool edquot, bool pool_locked)
 {
        struct rw_semaphore     *sem = NULL;
        struct qmt_pool_info    *qpi;
@@ -1147,8 +1147,11 @@ void qmt_seed_glbe_all(const struct lu_env *env, struct lqe_glbl_data *lgd,
                if (qmt_pool_global(qpi)) {
                        slaves_cnt = lgd->lqeg_num_alloc;
                } else {
-                       sem = qmt_sarr_rwsem(qpi);
-                       down_read(sem);
+                       if (!pool_locked) {
+                               sem = qmt_sarr_rwsem(qpi);
+                               down_read(sem);
+                       }
+
                        slaves_cnt = qmt_sarr_count(qpi);
                }
 
@@ -1212,7 +1215,7 @@ qunit_lbl:
                        }
                }
 
-               if (!qmt_pool_global(qpi))
+               if (!pool_locked && !qmt_pool_global(qpi))
                        up_read(sem);
        }
        /* TODO: only for debug purposes - remove it later */
@@ -1251,7 +1254,7 @@ void qmt_setup_lqe_gd(const struct lu_env *env, struct qmt_device *qmt,
 
        qmt_pool_lqes_lookup_spec(env, qmt, pool_type,
                                  lqe_qtype(lqe), &lqe->lqe_id);
-       qmt_seed_glbe(env, lgd);
+       qmt_seed_glbe(env, lgd, false);
 
        lqe->lqe_glbl_data = lgd;
        qmt_id_lock_notify(qmt, lqe);
index 330f3cc..506e331 100644 (file)
@@ -124,7 +124,7 @@ static void qmt_set_id_notify(const struct lu_env *env, struct qmt_device *qmt,
 
        mutex_lock(&lqe_gl->lqe_glbl_data_lock);
        if (lqe_gl->lqe_glbl_data)
-               qmt_seed_glbe(env, lqe_gl->lqe_glbl_data);
+               qmt_seed_glbe(env, lqe_gl->lqe_glbl_data, false);
        mutex_unlock(&lqe_gl->lqe_glbl_data_lock);
 
        /* Even if slaves haven't enqueued quota lock yet,
index 286edb1..63bbd67 100644 (file)
@@ -501,13 +501,13 @@ void qmt_free_lqe_gd(struct lqe_glbl_data *);
 void qmt_setup_lqe_gd(const struct lu_env *,  struct qmt_device *,
                    struct lquota_entry *, struct lqe_glbl_data *, int);
 #define qmt_seed_glbe_edquot(env, lqeg) \
-               qmt_seed_glbe_all(env, lqeg, false, true)
+               qmt_seed_glbe_all(env, lqeg, false, true, false)
 #define qmt_seed_glbe_qunit(env, lqeg) \
-               qmt_seed_glbe_all(env, lqeg, true, false)
-#define qmt_seed_glbe(env, lqeg) \
-               qmt_seed_glbe_all(env, lqeg, true, true)
+               qmt_seed_glbe_all(env, lqeg, true, false, false)
+#define qmt_seed_glbe(env, lqeg, pool_locked) \
+               qmt_seed_glbe_all(env, lqeg, true, true, pool_locked)
 void qmt_seed_glbe_all(const struct lu_env *, struct lqe_glbl_data *,
-                      bool , bool);
+                      bool, bool, bool);
 
 /* qmt_handler.c */
 int qmt_set_with_lqe(const struct lu_env *env, struct qmt_device *qmt,
index ea23606..7ca5ce9 100644 (file)
@@ -1105,7 +1105,8 @@ static int qmt_site_recalc_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 
                                mutex_lock(&lqeg->lqe_glbl_data_lock);
                                if (lqeg->lqe_glbl_data)
-                                       qmt_seed_glbe(env, lqeg->lqe_glbl_data);
+                                       qmt_seed_glbe(env,
+                                               lqeg->lqe_glbl_data, true);
                                mutex_unlock(&lqeg->lqe_glbl_data_lock);
                                qmt_id_lock_notify(qmt, lqeg);
                        }