Whamcloud - gitweb
LU-15283 quota: deadlock between reint & lquota_wb 67/45667/6
authorYang Sheng <ys@whamcloud.com>
Mon, 29 Nov 2021 15:00:03 +0000 (23:00 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 11 Jan 2022 06:18:59 +0000 (06:18 +0000)
The reintegration thread may be still running while
the lquota_wb thread process the update record. The
reint thread will hold the dynlock and start a
transaction, lquota_wb thread will start a transacation
then try to grab the dynlock. So we must avoid the
reint & writeback thread running in parallel. This
issue only occur on the ldiskfs case.

COMMAND: "qsd_reint_2.wor"
__schedule
schedule
wait_transaction_locked [jbd2]
add_transaction_credits [jbd2]
start_this_handle [jbd2]
jbd2__journal_start [jbd2]
__ldiskfs_journal_start_sb [ldiskfs]
ldiskfs_release_dquot [ldiskfs]
dqput
dquot_get_dqblk
osd_acct_index_lookup [osd_ldiskfs]
lquota_disk_read [lquota]
qsd_refresh_usage [lquota]
qsd_reconciliation [lquota]
qsd_reint_main [lquota]
kthread
ret_from_fork

COMMAND: "lquota_wb_work-"
__schedule
 schedule
 dynlock_lock [osd_ldiskfs]
 __iam_it_get [osd_ldiskfs]
 iam_it_get [osd_ldiskfs]
 osd_index_iam_lookup [osd_ldiskfs]
 lquota_disk_write [lquota]
 qsd_update_index [lquota]
 qsd_upd_thread [lquota]
 kthread
 ret_from_fork

Signed-off-by: Yang Sheng <ys@whamcloud.com>
Change-Id: I8cdd6227d3b0c5d4f67c432c3129da42a83c0ef2
Reviewed-on: https://review.whamcloud.com/45667
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Sergey Cheremencev <sergey.cheremencev@hpe.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_quota.h
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-zfs/osd_handler.c
lustre/quota/qsd_internal.h
lustre/quota/qsd_lib.c
lustre/quota/qsd_reint.c
lustre/quota/qsd_writeback.c

index 9844dbf..4b674d8 100644 (file)
@@ -185,7 +185,7 @@ enum osd_quota_local_flags {
 };
 
 struct qsd_instance *qsd_init(const struct lu_env *, char *, struct dt_device *,
-                             struct proc_dir_entry *, bool is_md);
+                             struct proc_dir_entry *, bool is_md, bool excl);
 int qsd_prepare(const struct lu_env *, struct qsd_instance *);
 int qsd_start(const struct lu_env *, struct qsd_instance *);
 void qsd_fini(const struct lu_env *, struct qsd_instance *);
index 55403b4..d206292 100644 (file)
@@ -8207,8 +8207,8 @@ static int osd_device_init0(const struct lu_env *env,
        /* currently it's no need to prepare qsd_instance_md for OST */
        if (!o->od_is_ost) {
                o->od_quota_slave_md = qsd_init(env, o->od_svname,
-                                               &o->od_dt_dev,
-                                               o->od_proc_entry, true);
+                                               &o->od_dt_dev, o->od_proc_entry,
+                                               true, true);
                if (IS_ERR(o->od_quota_slave_md)) {
                        rc = PTR_ERR(o->od_quota_slave_md);
                        o->od_quota_slave_md = NULL;
@@ -8217,7 +8217,7 @@ static int osd_device_init0(const struct lu_env *env,
        }
 
        o->od_quota_slave_dt = qsd_init(env, o->od_svname, &o->od_dt_dev,
-                                       o->od_proc_entry, false);
+                                       o->od_proc_entry, false, true);
 
        if (IS_ERR(o->od_quota_slave_dt)) {
                if (o->od_quota_slave_md != NULL) {
index 8a9d4c2..f4f3ea8 100644 (file)
@@ -1208,8 +1208,8 @@ static int osd_mount(const struct lu_env *env,
        /* currently it's no need to prepare qsd_instance_md for OST */
        if (!o->od_is_ost) {
                o->od_quota_slave_md = qsd_init(env, o->od_svname,
-                                               &o->od_dt_dev,
-                                               o->od_proc_entry, true);
+                                               &o->od_dt_dev, o->od_proc_entry,
+                                               true, false);
                if (IS_ERR(o->od_quota_slave_md)) {
                        rc = PTR_ERR(o->od_quota_slave_md);
                        o->od_quota_slave_md = NULL;
@@ -1218,7 +1218,7 @@ static int osd_mount(const struct lu_env *env,
        }
 
        o->od_quota_slave_dt = qsd_init(env, o->od_svname, &o->od_dt_dev,
-                                    o->od_proc_entry, false);
+                                       o->od_proc_entry, false, false);
 
        if (IS_ERR(o->od_quota_slave_dt)) {
                if (o->od_quota_slave_md != NULL) {
index 7144e2e..b590b84 100644 (file)
@@ -111,7 +111,9 @@ struct qsd_instance {
                                qsd_prepared:1, /* qsd_prepare() successfully
                                                  * called */
                                qsd_exp_valid:1,/* qsd_exp is now valid */
-                               qsd_stopping:1; /* qsd_instance is stopping */
+                               qsd_stopping:1, /* qsd_instance is stopping */
+                               qsd_updating:1, /* qsd is updating record */
+                               qsd_exclusive:1; /* upd exclusive with reint */
 
 };
 
index 03e6f79..6b827cb 100644 (file)
@@ -651,7 +651,8 @@ EXPORT_SYMBOL(qsd_fini);
  */
 struct qsd_instance *qsd_init(const struct lu_env *env, char *svname,
                              struct dt_device *dev,
-                             struct proc_dir_entry *osd_proc, bool is_md)
+                             struct proc_dir_entry *osd_proc,
+                             bool is_md, bool excl)
 {
        struct qsd_thread_info  *qti = qsd_info(env);
        struct qsd_instance     *qsd;
@@ -677,6 +678,8 @@ struct qsd_instance *qsd_init(const struct lu_env *env, char *svname,
        qsd->qsd_prepared = false;
        qsd->qsd_started = false;
        qsd->qsd_is_md = is_md;
+       qsd->qsd_updating = false;
+       qsd->qsd_exclusive = excl;
 
        /* copy service name */
        if (strlcpy(qsd->qsd_svname, svname, sizeof(qsd->qsd_svname))
index ac9b86c..666df96 100644 (file)
@@ -575,13 +575,13 @@ static int qsd_entry_iter_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
        return 0;
 }
 
-static bool qsd_pending_updates(struct qsd_qtype_info *qqi)
+static bool qqi_reint_delayed(struct qsd_qtype_info *qqi)
 {
        struct qsd_instance     *qsd = qqi->qqi_qsd;
        struct qsd_upd_rec      *upd;
        struct lquota_entry     *lqe, *n;
        int                      dqacq = 0;
-       bool                     updates = false;
+       bool                     delay = false;
        ENTRY;
 
        /* any pending quota adjust? */
@@ -594,35 +594,46 @@ static bool qsd_pending_updates(struct qsd_qtype_info *qqi)
        }
        spin_unlock(&qsd->qsd_adjust_lock);
 
+       /* any pending quota request? */
+       cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb,
+                              &dqacq);
+       if (dqacq) {
+               CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n",
+                      qsd->qsd_svname, qqi->qqi_qtype);
+               GOTO(out, delay = true);
+       }
+
        /* any pending updates? */
-       read_lock(&qsd->qsd_lock);
+       write_lock(&qsd->qsd_lock);
+
+       /* check if the reintegration has already started or finished */
+       if ((qqi->qqi_glb_uptodate && qqi->qqi_slv_uptodate) ||
+            qqi->qqi_reint || qsd->qsd_stopping || qsd->qsd_updating)
+               GOTO(out_lock, delay = true);
+
+       /* there could be some unfinished global or index entry updates
+        * (very unlikely), to avoid them messing up with the reint
+        * procedure, we just return and try to re-start reint later. */
        list_for_each_entry(upd, &qsd->qsd_upd_list, qur_link) {
                if (upd->qur_qqi == qqi) {
-                       read_unlock(&qsd->qsd_lock);
                        CDEBUG(D_QUOTA, "%s: pending %s updates for type:%d.\n",
                               qsd->qsd_svname,
                               upd->qur_global ? "global" : "slave",
                               qqi->qqi_qtype);
-                       GOTO(out, updates = true);
+                       GOTO(out_lock, delay = true);
                }
        }
-       read_unlock(&qsd->qsd_lock);
+       qqi->qqi_reint = 1;
 
-       /* any pending quota request? */
-       cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb,
-                              &dqacq);
-       if (dqacq) {
-               CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n",
-                      qsd->qsd_svname, qqi->qqi_qtype);
-               updates = true;
-       }
        EXIT;
+out_lock:
+       write_unlock(&qsd->qsd_lock);
 out:
-       if (updates)
+       if (delay)
                CERROR("%s: Delaying reintegration for qtype:%d until pending "
                       "updates are flushed.\n",
                       qsd->qsd_svname, qqi->qqi_qtype);
-       return updates;
+       return delay;
 }
 
 int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
@@ -649,28 +660,8 @@ int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
                /* no space accounting support, can't enable enforcement */
                RETURN(0);
 
-       /* check if the reintegration has already started or finished */
-       write_lock(&qsd->qsd_lock);
-
-       if ((qqi->qqi_glb_uptodate && qqi->qqi_slv_uptodate) ||
-            qqi->qqi_reint || qsd->qsd_stopping) {
-               write_unlock(&qsd->qsd_lock);
+       if (qqi_reint_delayed(qqi))
                RETURN(0);
-       }
-       qqi->qqi_reint = 1;
-
-       write_unlock(&qsd->qsd_lock);
-
-       /* there could be some unfinished global or index entry updates
-        * (very unlikely), to avoid them messing up with the reint
-        * procedure, we just return and try to re-start reint later. */
-       if (qsd_pending_updates(qqi)) {
-               write_lock(&qsd->qsd_lock);
-               qqi->qqi_reint = 0;
-               write_unlock(&qsd->qsd_lock);
-               RETURN(0);
-       }
-
 
        OBD_ALLOC_PTR(args);
        if (args == NULL)
index 6584df5..e32045e 100644 (file)
@@ -280,9 +280,18 @@ static int qsd_process_upd(const struct lu_env *env, struct qsd_upd_rec *upd)
 {
        struct lquota_entry     *lqe = upd->qur_lqe;
        struct qsd_qtype_info   *qqi = upd->qur_qqi;
+       struct qsd_instance     *qsd = qqi->qqi_qsd;
        int                      rc;
        ENTRY;
 
+       if (qsd->qsd_exclusive) { /* It could be deadlock running with reint */
+               read_lock(&qsd->qsd_lock);
+               rc = qqi->qqi_reint;
+               read_unlock(&qsd->qsd_lock);
+               if (rc)
+                       return 1;
+       }
+
        if (lqe == NULL) {
                lqe = lqe_locate(env, qqi->qqi_site, &upd->qur_qid);
                if (IS_ERR(lqe))
@@ -298,9 +307,9 @@ static int qsd_process_upd(const struct lu_env *env, struct qsd_upd_rec *upd)
                /* refresh usage */
                qsd_refresh_usage(env, lqe);
 
-               spin_lock(&qqi->qqi_qsd->qsd_adjust_lock);
+               spin_lock(&qsd->qsd_adjust_lock);
                lqe->lqe_adjust_time = 0;
-               spin_unlock(&qqi->qqi_qsd->qsd_adjust_lock);
+               spin_unlock(&qsd->qsd_adjust_lock);
 
                /* Report usage asynchronously */
                rc = qsd_adjust(env, lqe);
@@ -411,6 +420,8 @@ static bool qsd_job_pending(struct qsd_instance *qsd, struct list_head *upd,
                list_splice_init(&qsd->qsd_upd_list, upd);
                job_pending = true;
        }
+       if (qsd->qsd_exclusive)
+               qsd->qsd_updating = job_pending;
 
        for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
                struct qsd_qtype_info *qqi = qsd->qsd_type_array[qtype];
@@ -466,15 +477,38 @@ static int qsd_upd_thread(void *_args)
        complete(args->qua_started);
        while (({set_current_state(TASK_IDLE);
                 !kthread_should_stop(); })) {
+               int count = 0;
 
                if (!qsd_job_pending(qsd, &queue, &uptodate))
                        schedule_timeout(cfs_time_seconds(QSD_WB_INTERVAL));
                __set_current_state(TASK_RUNNING);
 
-               list_for_each_entry_safe(upd, n, &queue, qur_link) {
-                       list_del_init(&upd->qur_link);
-                       qsd_process_upd(env, upd);
-                       qsd_upd_free(upd);
+               while (1) {
+                       list_for_each_entry_safe(upd, n, &queue, qur_link) {
+                               if (qsd_process_upd(env, upd) <= 0) {
+                                       list_del_init(&upd->qur_link);
+                                       qsd_upd_free(upd);
+                               }
+                       }
+                       if (list_empty(&queue))
+                               break;
+                       count++;
+                       if (count % 7 == 0) {
+                               n = list_entry(&queue, struct qsd_upd_rec,
+                                              qur_link);
+                               CWARN("%s: The reintegration thread [%d] "
+                                     "blocked more than %ld seconds\n",
+                                     n->qur_qqi->qqi_qsd->qsd_svname,
+                                     n->qur_qqi->qqi_qtype, count *
+                                     cfs_time_seconds(QSD_WB_INTERVAL) / 10);
+                       }
+                       schedule_timeout_interruptible(
+                               cfs_time_seconds(QSD_WB_INTERVAL) / 10);
+               }
+               if (qsd->qsd_exclusive) {
+                       write_lock(&qsd->qsd_lock);
+                       qsd->qsd_updating = false;
+                       write_unlock(&qsd->qsd_lock);
                }
 
                spin_lock(&qsd->qsd_adjust_lock);