Whamcloud - gitweb
LU-16341 quota: fix panic in qmt_site_recalc_cb 41/49241/4
authorSergey Cheremencev <sergey.cheremencev@hpe.com>
Fri, 24 Jun 2022 20:38:29 +0000 (23:38 +0300)
committerOleg Drokin <green@whamcloud.com>
Mon, 13 Mar 2023 06:06:24 +0000 (06:06 +0000)
The panic occurred due to empty qit_lqes array after
qmt_pool_lqes_lookup_spec. Sometimes it is possible if
global lqe is not enforced. Return -ENOENT from
qmt_pool_lqes_lookup_spec if no lqes have been added.
It fixes following panic:
BUG: unable to handle kernel NULL pointer dereference at 00000000000000f8
...
RIP: 0010:qmt_site_recalc_cb+0x2ec/0x780 [lquota]
...
[ffffa5564118fda0] cfs_hash_for_each_tight at ffffffffc0c72c81 [libcfs]
[ffffa5564118fe08] qmt_pool_recalc at ffffffffc142dec7 [lquota]
[ffffa5564118ff10] kthread at ffffffffb45043a6
[ffffa5564118ff50] ret_from_fork at ffffffffb4e00255

Add test sanity-quota_14 that reproduces above panic
without the fix.

HPE-bug-id: LUS-11007
Change-Id: Ie51396269fae7ed84379bef5fc964cce789eba7c
Signed-off-by: Sergey Cheremencev <sergey.cheremencev@hpe.com>
Reviewed-on: https://es-gerrit.dev.cray.com/160828
Tested-by: Jenkins Build User <nssreleng@cray.com>
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49241
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: jsimmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/quota/qmt_handler.c
lustre/quota/qmt_pool.c
lustre/tests/sanity-quota.sh

index 3b54192..3446229 100644 (file)
@@ -113,7 +113,7 @@ static void qmt_set_id_notify(const struct lu_env *env, struct qmt_device *qmt,
        lqe_gl = lqe->lqe_is_global ? lqe : NULL;
        rc = qmt_pool_lqes_lookup_spec(env, qmt, lqe_rtype(lqe),
                                       lqe_qtype(lqe), &lqe->lqe_id);
-       if (!qti_lqes_cnt(env))
+       if (rc)
                GOTO(lqes_fini, rc);
 
        if (!lqe_gl && qti_lqes_glbl(env)->lqe_is_global)
index dcfb7c7..97aefb1 100644 (file)
@@ -876,7 +876,6 @@ int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt,
 {
        struct qmt_pool_info    *pos;
        struct lquota_entry     *lqe;
-       int rc = 0;
 
        qti_lqes_init(env);
        down_read(&qmt->qmt_pool_lock);
@@ -895,11 +894,8 @@ int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt,
                /* ENOENT is valid case for lqe from non global pool
                 * that hasn't limits, i.e. not enforced. Continue even
                 * in case of error - we can handle already found lqes */
-               if (IS_ERR_OR_NULL(lqe)) {
-                       /* let know that something went wrong */
-                       rc = lqe ? PTR_ERR(lqe) : -ENOENT;
+               if (IS_ERR(lqe))
                        continue;
-               }
                if (!lqe->lqe_enforced) {
                        /* no settings for this qid_uid */
                        lqe_putref(lqe);
@@ -910,7 +906,7 @@ int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt,
                                 lqe, pos->qpi_name);
        }
        up_read(&qmt->qmt_pool_lock);
-       RETURN(rc);
+       RETURN(qti_lqes_cnt(env) ? 0 : -ENOENT);
 }
 
 /**
index 7e388fb..b6dfed9 100755 (executable)
@@ -2584,6 +2584,39 @@ test_13(){
 }
 run_test 13 "Cancel per-ID lock in the LRU list"
 
+test_14()
+{
+       local qpool="qpool1"
+       local tfile1="$DIR/$tdir/$tfile-0"
+
+       mds_supports_qp
+       setup_quota_test || error "setup quota failed with $?"
+       # enable ost quota
+       set_ost_qtype $QTYPE || error "enable ost quota failed"
+
+       $LFS setquota -u $TSTUSR -b 0 -B 100M -i 0 -I 0 $DIR ||
+               error "set user quota failed"
+       pool_add $qpool || error "pool_add failed"
+       pool_add_targets $qpool 0 ||
+               error "pool_add_targets failed"
+       $LFS setstripe -p $qpool $DIR/$tdir || error "cannot set stripe"
+       $LFS setquota -u $TSTUSR -B 30M --pool $qpool $DIR ||
+               error "set user quota failed"
+
+       # don't care about returned value
+       $RUNAS $DD of=$tfile1 count=10 oflag=direct
+
+       echo "Stop ost1..."
+       stop ost1
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR ||
+               error "set user quota failed"
+
+       # no panic after removing OST0000 from the pool
+       pool_remove_target $qpool 0
+       start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || error "start ost1 failed"
+}
+run_test 14 "check panic in qmt_site_recalc_cb"
+
 test_15(){
        local LIMIT=$((24 * 1024 * 1024 * 1024 * 1024)) # 24 TB