From 188112fc806c8c61d536ba3230b8d50f65e4f8fc Mon Sep 17 00:00:00 2001 From: Sergey Cheremencev Date: Tue, 13 Apr 2021 02:44:34 +0300 Subject: [PATCH] LU-14300 quota: avoid nested lqe lookup lqe_locate called from qmt_pool_lqes_lookup for lqe that hasn't an entry on a disk calls qmt_lqe_set_default. This may call qmt_set_id_notify->qmt_pool_lqes_spec and rewrite already added lqes in a qti. Rewritten lqes may trigger an assertion: LustreError: 5072:0:(qmt_pool.c:838:qmt_pool_lqes_lookup()) ASSERTION( (((qmt_info(env)->qti_lqes_num) > 16 ? qmt_info(env)->qti_lqes : qmt_info(env)->qti_lqes_small)[(qmt_info(env)->qti_glbl_lqe_idx)])->lqe_is_global ) failed: LustreError: 5072:0:(qmt_pool.c:838:qmt_pool_lqes_lookup()) LBUG Pid: 5072, comm: mdt_rdpg00_003 3.10.0-957.1.3957.1.3.x4.1.15.x86_64 #1 SMP Mon Nov 18 14:47:03 PST 2019 Call Trace: [] libcfs_call_trace+0x8c/0xc0 [libcfs] [] lbug_with_loc+0x4c/0xa0 [libcfs] [] qmt_pool_lqes_lookup+0x798/0x8f0 [lquota] [] qmt_intent_policy+0x86e/0xe00 [lquota] [] mdt_intent_opc+0x3bd/0xb40 [mdt] [] mdt_intent_policy+0x1a4/0x360 [mdt] [] ldlm_lock_enqueue+0x3cb/0xad0 [ptlrpc] [] ldlm_handle_enqueue0+0xa56/0x1610 [ptlrpc] [] tgt_enqueue+0x62/0x210 [ptlrpc] [] tgt_request_handle+0x7ea/0x1750 [ptlrpc] or a deadlock(2 same lqes qti_lqes array): call_rwsem_down_write_failed+0x17/0x30 qti_lqes_write_lock+0xb1/0x1b0 [lquota] qmt_dqacq0+0x2ee/0x1ac0 [lquota] qmt_intent_policy+0xbfe/0xe00 [lquota] mdt_intent_opc+0x3ba/0xb50 [mdt] mdt_intent_policy+0x1a1/0x360 [mdt] ldlm_lock_enqueue+0x3d6/0xaa0 [ptlrpc] ldlm_handle_enqueue0+0xa76/0x1620 [ptlrpc] tgt_enqueue+0x62/0x210 [ptlrpc] tgt_request_handle+0x96a/0x1680 [ptlrpc] kthread+0xd1/0xe0 Patch adds a sanity-quota_73b to check that the isssue doesn't exist anymore. Change-Id: Ib1ebe82c3b6e819b2538f30af08930060bd659ae HPE-bug-id: LUS-9902 Signed-off-by: Sergey Cheremencev Reviewed-on: https://es-gerrit.dev.cray.com/158581 Tested-by: Jenkins Build User Reviewed-by: Shaun Tancheff Reviewed-by: Alexander Zarochentsev Reviewed-on: https://review.whamcloud.com/43326 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Shaun Tancheff Reviewed-by: Wang Shilong Reviewed-by: Oleg Drokin --- lustre/quota/qmt_entry.c | 4 ++++ lustre/quota/qmt_handler.c | 7 ++++++- lustre/quota/qmt_internal.h | 1 + lustre/tests/sanity-quota.sh | 41 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/lustre/quota/qmt_entry.c b/lustre/quota/qmt_entry.c index a1eb9c7..c2bb927 100644 --- a/lustre/quota/qmt_entry.c +++ b/lustre/quota/qmt_entry.c @@ -942,6 +942,8 @@ void qti_lqes_fini(const struct lu_env *env) if (qti->qti_lqes_num > QMT_MAX_POOL_NUM) OBD_FREE(qti->qti_lqes, qti->qti_lqes_num * sizeof(struct lquota_entry *)); + + qti->qti_lqes_num = 0; } int qti_lqes_min_qunit(const struct lu_env *env) @@ -1050,6 +1052,8 @@ void qmt_seed_glbe_all(const struct lu_env *env, struct lqe_glbl_data *lgd, int i, j, idx; ENTRY; + if (!qti_lqes_cnt(env)) + RETURN_EXIT; /* lqes array is sorted by qunit - the first entry has minimum qunit. * Thus start seeding global qunit's array beginning from the 1st lqe * and appropriate pool. If pools overlapped, slaves from this diff --git a/lustre/quota/qmt_handler.c b/lustre/quota/qmt_handler.c index 5e7f2f5..166dc30 100644 --- a/lustre/quota/qmt_handler.c +++ b/lustre/quota/qmt_handler.c @@ -279,7 +279,12 @@ out_nolock: * values(see qmt_id_lock_cb for details).*/ need_id_notify = true; } - if (need_id_notify) + /* qti_lqes_inited > 0 means we came here from another + * qmt_pool_lqes_lookup(qmt_dqacq, intent_policy ...). Thus + * we can't init and add new lqes to don't overwrite already + * added. + */ + if (!qti_lqes_inited(env) && need_id_notify) qmt_set_id_notify(env, qmt, lqe); } diff --git a/lustre/quota/qmt_internal.h b/lustre/quota/qmt_internal.h index 3ccd4d4..235782c 100644 --- a/lustre/quota/qmt_internal.h +++ b/lustre/quota/qmt_internal.h @@ -284,6 +284,7 @@ struct qmt_thread_info *qmt_info(const struct lu_env *env) } #define qti_lqes_num(env) (qmt_info(env)->qti_lqes_num) +#define qti_lqes_inited(env) (qmt_info(env)->qti_lqes_num) #define qti_lqes_cnt(env) (qmt_info(env)->qti_lqes_cnt) #define qti_glbl_lqe_idx(env) (qmt_info(env)->qti_glbl_lqe_idx) #define qti_lqes(env) (qti_lqes_num(env) > QMT_MAX_POOL_NUM ? \ diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index d6b3534..ec556d4 100755 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -4852,7 +4852,7 @@ test_72() } run_test 72 "lfs quota --pool prints only pool's OSTs" -test_73() +test_73a() { local qpool="qpool1" @@ -4864,7 +4864,44 @@ test_73() test_default_quota "-u" "data" "qpool1" } -run_test 73 "default limits at OST Pool Quotas" +run_test 73a "default limits at OST Pool Quotas" + +test_73b() +{ + local TESTFILE1="$DIR/$tdir/$tfile-1" + local limit=20 #20M + local qpool="qpool1" + + mds_supports_qp + + setup_quota_test || error "setup quota failed with $?" + stack_trap cleanup_quota_test EXIT + quota_init + set_ost_qtype $QTYPE || error "enable ost quota failed" + + # pool quotas don't work properly without global limit + $LFS setquota -u $TSTUSR -b 0 -B ${limit}M -i 0 -I 0 $DIR || + error "set global limit failed" + + pool_add $qpool || error "pool_add failed" + pool_add_targets $qpool 0 $((OSTCOUNT - 1)) || + error "pool_add_targets failed" + + log "set default quota for $qpool" + $LFS setquota -U --pool $qpool -b ${limit}M -B ${limit}M $DIR || + error "set default quota failed" + + log "Write from user that hasn't lqe" + # Check that it doesn't cause a panic or a deadlock + # due to nested lqe lookups that rewrite 1st lqe in qti_lqes array. + # Have to use RUNAS_ID as resetquota creates lqes in + # the beginning for TSTUSR/TSTUSR2 when sets limits to 0. + runas -u $RUNAS_ID -g $RUNAS_GID $DD of=$TESTFILE1 count=10 + + cancel_lru_locks osc + sync; sync_all_data || true +} +run_test 73b "default OST Pool Quotas limit for new user" test_74() { -- 1.8.3.1