From dfe7d2dd2b0d4c0c08faa613f44d2ab1f74c7420 Mon Sep 17 00:00:00 2001 From: Sergey Cheremencev Date: Fri, 24 Jun 2022 23:38:29 +0300 Subject: [PATCH] LU-16341 quota: fix panic in qmt_site_recalc_cb The panic occurred due to empty qit_lqes array after qmt_pool_lqes_lookup_spec. Sometimes it is possible if global lqe is not enforced. Return -ENOENT from qmt_pool_lqes_lookup_spec if no lqes have been added. It fixes following panic: BUG: unable to handle kernel NULL pointer dereference at 00000000000000f8 ... RIP: 0010:qmt_site_recalc_cb+0x2ec/0x780 [lquota] ... [ffffa5564118fda0] cfs_hash_for_each_tight at ffffffffc0c72c81 [libcfs] [ffffa5564118fe08] qmt_pool_recalc at ffffffffc142dec7 [lquota] [ffffa5564118ff10] kthread at ffffffffb45043a6 [ffffa5564118ff50] ret_from_fork at ffffffffb4e00255 Add test sanity-quota_14 that reproduces above panic without the fix. HPE-bug-id: LUS-11007 Change-Id: Ie51396269fae7ed84379bef5fc964cce789eba7c Signed-off-by: Sergey Cheremencev Reviewed-on: https://es-gerrit.dev.cray.com/160828 Tested-by: Jenkins Build User Reviewed-by: Andriy Skulysh Reviewed-by: Alexander Boyko Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49241 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: jsimmons Reviewed-by: Oleg Drokin --- lustre/quota/qmt_handler.c | 2 +- lustre/quota/qmt_pool.c | 8 ++------ lustre/tests/sanity-quota.sh | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/lustre/quota/qmt_handler.c b/lustre/quota/qmt_handler.c index 3b54192..3446229 100644 --- a/lustre/quota/qmt_handler.c +++ b/lustre/quota/qmt_handler.c @@ -113,7 +113,7 @@ static void qmt_set_id_notify(const struct lu_env *env, struct qmt_device *qmt, lqe_gl = lqe->lqe_is_global ? lqe : NULL; rc = qmt_pool_lqes_lookup_spec(env, qmt, lqe_rtype(lqe), lqe_qtype(lqe), &lqe->lqe_id); - if (!qti_lqes_cnt(env)) + if (rc) GOTO(lqes_fini, rc); if (!lqe_gl && qti_lqes_glbl(env)->lqe_is_global) diff --git a/lustre/quota/qmt_pool.c b/lustre/quota/qmt_pool.c index dcfb7c7..97aefb1 100644 --- a/lustre/quota/qmt_pool.c +++ b/lustre/quota/qmt_pool.c @@ -876,7 +876,6 @@ int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt, { struct qmt_pool_info *pos; struct lquota_entry *lqe; - int rc = 0; qti_lqes_init(env); down_read(&qmt->qmt_pool_lock); @@ -895,11 +894,8 @@ int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt, /* ENOENT is valid case for lqe from non global pool * that hasn't limits, i.e. not enforced. Continue even * in case of error - we can handle already found lqes */ - if (IS_ERR_OR_NULL(lqe)) { - /* let know that something went wrong */ - rc = lqe ? PTR_ERR(lqe) : -ENOENT; + if (IS_ERR(lqe)) continue; - } if (!lqe->lqe_enforced) { /* no settings for this qid_uid */ lqe_putref(lqe); @@ -910,7 +906,7 @@ int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt, lqe, pos->qpi_name); } up_read(&qmt->qmt_pool_lock); - RETURN(rc); + RETURN(qti_lqes_cnt(env) ? 0 : -ENOENT); } /** diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index 7e388fb..b6dfed9 100755 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -2584,6 +2584,39 @@ test_13(){ } run_test 13 "Cancel per-ID lock in the LRU list" +test_14() +{ + local qpool="qpool1" + local tfile1="$DIR/$tdir/$tfile-0" + + mds_supports_qp + setup_quota_test || error "setup quota failed with $?" + # enable ost quota + set_ost_qtype $QTYPE || error "enable ost quota failed" + + $LFS setquota -u $TSTUSR -b 0 -B 100M -i 0 -I 0 $DIR || + error "set user quota failed" + pool_add $qpool || error "pool_add failed" + pool_add_targets $qpool 0 || + error "pool_add_targets failed" + $LFS setstripe -p $qpool $DIR/$tdir || error "cannot set stripe" + $LFS setquota -u $TSTUSR -B 30M --pool $qpool $DIR || + error "set user quota failed" + + # don't care about returned value + $RUNAS $DD of=$tfile1 count=10 oflag=direct + + echo "Stop ost1..." + stop ost1 + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR || + error "set user quota failed" + + # no panic after removing OST0000 from the pool + pool_remove_target $qpool 0 + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || error "start ost1 failed" +} +run_test 14 "check panic in qmt_site_recalc_cb" + test_15(){ local LIMIT=$((24 * 1024 * 1024 * 1024 * 1024)) # 24 TB -- 1.8.3.1