Whamcloud - gitweb
LU-16341 quota: fix panic in qmt_site_recalc_cb 18/55518/3
authorSergey Cheremencev <sergey.cheremencev@hpe.com>
Fri, 24 Jun 2022 20:38:29 +0000 (23:38 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 26 Jun 2024 18:49:09 +0000 (18:49 +0000)
The panic occurred due to empty qit_lqes array after
qmt_pool_lqes_lookup_spec. Sometimes it is possible if
global lqe is not enforced. Return -ENOENT from
qmt_pool_lqes_lookup_spec if no lqes have been added.

It fixes following panic:

    BUG: unable to handle NULL pointer dereference at 00000000000000f8
    ...
    RIP: 0010:qmt_site_recalc_cb+0x2ec/0x780 [lquota]
    ...
    cfs_hash_for_each_tight at ffffffffc0c72c81 [libcfs]
    qmt_pool_recalc at ffffffffc142dec7 [lquota]
    kthread at ffffffffb45043a6
    ret_from_fork at ffffffffb4e00255

Add test sanity-quota_14 that reproduces above panic without the fix,
but skip it for older MDS that do not have this fix.

Lustre-change: https://review.whamcloud.com/49241
Lustre-commit: dfe7d2dd2b0d4c0c08faa613f44d2ab1f74c7420

HPE-bug-id: LUS-11007
Change-Id: Ie51396269fae7ed84379bef5fc964cce789eba7c
Signed-off-by: Sergey Cheremencev <sergey.cheremencev@hpe.com>
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: jsimmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55518
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Sergey Cheremencev <scherementsev@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/quota/qmt_handler.c
lustre/quota/qmt_pool.c
lustre/tests/sanity-quota.sh

index d2789ce..4f4dd87 100644 (file)
@@ -113,7 +113,7 @@ static void qmt_set_id_notify(const struct lu_env *env, struct qmt_device *qmt,
        lqe_gl = lqe->lqe_is_global ? lqe : NULL;
        rc = qmt_pool_lqes_lookup_spec(env, qmt, lqe_rtype(lqe),
                                       lqe_qtype(lqe), &lqe->lqe_id);
-       if (!qti_lqes_cnt(env))
+       if (rc)
                GOTO(lqes_fini, rc);
 
        if (!lqe_gl && qti_lqes_glbl(env)->lqe_is_global)
index d9dc4a1..ca41c11 100644 (file)
@@ -876,7 +876,6 @@ int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt,
 {
        struct qmt_pool_info    *pos;
        struct lquota_entry     *lqe;
-       int rc = 0;
 
        qti_lqes_init(env);
        down_read(&qmt->qmt_pool_lock);
@@ -895,11 +894,8 @@ int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt,
                /* ENOENT is valid case for lqe from non global pool
                 * that hasn't limits, i.e. not enforced. Continue even
                 * in case of error - we can handle already found lqes */
-               if (IS_ERR_OR_NULL(lqe)) {
-                       /* let know that something went wrong */
-                       rc = lqe ? PTR_ERR(lqe) : -ENOENT;
+               if (IS_ERR(lqe))
                        continue;
-               }
                if (!lqe->lqe_enforced) {
                        /* no settings for this qid_uid */
                        lqe_putref(lqe);
@@ -910,7 +906,7 @@ int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt,
                                 lqe, pos->qpi_name);
        }
        up_read(&qmt->qmt_pool_lock);
-       RETURN(rc);
+       RETURN(qti_lqes_cnt(env) ? 0 : -ENOENT);
 }
 
 /**
index e5443b4..b7e5216 100755 (executable)
@@ -684,6 +684,9 @@ test_1a() {
 run_test 1a "Block hard limit (normal use and out of quota)"
 
 test_1b() {
+       (( MDS1_VERSION >= $(version_code 2.15.5) )) ||
+               skip "Need MDS version at least 2.15.5"
+
        local limit=10 # MB
        local global_limit=20 # MB
        local testfile="$DIR/$tdir/$tfile-0"
@@ -2523,6 +2526,42 @@ test_13(){
 }
 run_test 13 "Cancel per-ID lock in the LRU list"
 
+test_14()
+{
+       (( MDS1_VERSION >= $(version_code 2.15.5) )) ||
+               skip "Need MDS version at least 2.15.5"
+
+       local qpool="qpool1"
+       local tfile1="$DIR/$tdir/$tfile-0"
+
+       mds_supports_qp
+       setup_quota_test || error "setup quota failed with $?"
+       # enable ost quota
+       set_ost_qtype $QTYPE || error "enable ost quota failed"
+
+       $LFS setquota -u $TSTUSR -b 0 -B 100M -i 0 -I 0 $DIR ||
+               error "set user quota failed"
+       pool_add $qpool || error "pool_add failed"
+       pool_add_targets $qpool 0 ||
+               error "pool_add_targets failed"
+       $LFS setstripe -p $qpool $DIR/$tdir || error "cannot set stripe"
+       $LFS setquota -u $TSTUSR -B 30M --pool $qpool $DIR ||
+               error "set user quota failed"
+
+       # don't care about returned value
+       $RUNAS $DD of=$tfile1 count=10 oflag=direct
+
+       echo "Stop ost1..."
+       stop ost1
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR ||
+               error "set user quota failed"
+
+       # no panic after removing OST0000 from the pool
+       pool_remove_target $qpool 0
+       start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || error "start ost1 failed"
+}
+run_test 14 "check panic in qmt_site_recalc_cb"
+
 test_15(){
        local LIMIT=$((24 * 1024 * 1024 * 1024 * 1024)) # 24 TB