Whamcloud - gitweb
LU-14300 quota: avoid nested lqe lookup 26/43326/5
authorSergey Cheremencev <sergey.cheremencev@hpe.com>
Mon, 12 Apr 2021 23:44:34 +0000 (02:44 +0300)
committerOleg Drokin <green@whamcloud.com>
Thu, 22 Jul 2021 01:50:09 +0000 (01:50 +0000)
lqe_locate called from qmt_pool_lqes_lookup for lqe
that hasn't an entry on a disk calls qmt_lqe_set_default.
This may call qmt_set_id_notify->qmt_pool_lqes_spec
and rewrite already added lqes in a qti. Rewritten
lqes may trigger an assertion:

LustreError: 5072:0:(qmt_pool.c:838:qmt_pool_lqes_lookup()) ASSERTION( (((qmt_info(env)->qti_lqes_num) > 16 ? qmt_info(env)->qti_lqes : qmt_info(env)->qti_lqes_small)[(qmt_info(env)->qti_glbl_lqe_idx)])->lqe_is_global ) failed:
LustreError: 5072:0:(qmt_pool.c:838:qmt_pool_lqes_lookup()) LBUG
Pid: 5072, comm: mdt_rdpg00_003 3.10.0-957.1.3957.1.3.x4.1.15.x86_64 #1 SMP Mon Nov 18 14:47:03 PST 2019
Call Trace:
 [<ffffffffc046f62c>] libcfs_call_trace+0x8c/0xc0 [libcfs]
 [<ffffffffc046f94c>] lbug_with_loc+0x4c/0xa0 [libcfs]
 [<ffffffffc0e4ae38>] qmt_pool_lqes_lookup+0x798/0x8f0 [lquota]
 [<ffffffffc0e3b0ce>] qmt_intent_policy+0x86e/0xe00 [lquota]
 [<ffffffffc109d53d>] mdt_intent_opc+0x3bd/0xb40 [mdt]
 [<ffffffffc10a5134>] mdt_intent_policy+0x1a4/0x360 [mdt]
 [<ffffffffc0a7bedb>] ldlm_lock_enqueue+0x3cb/0xad0 [ptlrpc]
 [<ffffffffc0aa4a46>] ldlm_handle_enqueue0+0xa56/0x1610 [ptlrpc]
 [<ffffffffc0b304b2>] tgt_enqueue+0x62/0x210 [ptlrpc]
 [<ffffffffc0b3753a>] tgt_request_handle+0x7ea/0x1750 [ptlrpc]

or a deadlock(2 same lqes qti_lqes array):

 call_rwsem_down_write_failed+0x17/0x30
 qti_lqes_write_lock+0xb1/0x1b0 [lquota]
 qmt_dqacq0+0x2ee/0x1ac0 [lquota]
 qmt_intent_policy+0xbfe/0xe00 [lquota]
 mdt_intent_opc+0x3ba/0xb50 [mdt]
 mdt_intent_policy+0x1a1/0x360 [mdt]
 ldlm_lock_enqueue+0x3d6/0xaa0 [ptlrpc]
 ldlm_handle_enqueue0+0xa76/0x1620 [ptlrpc]
 tgt_enqueue+0x62/0x210 [ptlrpc]
 tgt_request_handle+0x96a/0x1680 [ptlrpc]
 kthread+0xd1/0xe0

Patch adds a sanity-quota_73b to check that the isssue
doesn't exist anymore.

Change-Id: Ib1ebe82c3b6e819b2538f30af08930060bd659ae
HPE-bug-id: LUS-9902
Signed-off-by: Sergey Cheremencev <sergey.cheremencev@hpe.com>
Reviewed-on: https://es-gerrit.dev.cray.com/158581
Tested-by: Jenkins Build User <nssreleng@cray.com>
Reviewed-by: Shaun Tancheff <stancheff@cray.com>
Reviewed-by: Alexander Zarochentsev <c17826@cray.com>
Reviewed-on: https://review.whamcloud.com/43326
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Wang Shilong <wshilong@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/quota/qmt_entry.c
lustre/quota/qmt_handler.c
lustre/quota/qmt_internal.h
lustre/tests/sanity-quota.sh

index a1eb9c7..c2bb927 100644 (file)
@@ -942,6 +942,8 @@ void qti_lqes_fini(const struct lu_env *env)
        if (qti->qti_lqes_num > QMT_MAX_POOL_NUM)
                OBD_FREE(qti->qti_lqes,
                         qti->qti_lqes_num * sizeof(struct lquota_entry *));
+
+       qti->qti_lqes_num = 0;
 }
 
 int qti_lqes_min_qunit(const struct lu_env *env)
@@ -1050,6 +1052,8 @@ void qmt_seed_glbe_all(const struct lu_env *env, struct lqe_glbl_data *lgd,
        int                      i, j, idx;
        ENTRY;
 
+       if (!qti_lqes_cnt(env))
+               RETURN_EXIT;
        /* lqes array is sorted by qunit - the first entry has minimum qunit.
         * Thus start seeding global qunit's array beginning from the 1st lqe
         * and appropriate pool. If pools overlapped, slaves from this
index 5e7f2f5..166dc30 100644 (file)
@@ -279,7 +279,12 @@ out_nolock:
                         * values(see qmt_id_lock_cb for details).*/
                        need_id_notify = true;
                }
-               if (need_id_notify)
+               /* qti_lqes_inited > 0 means we came here from another
+                * qmt_pool_lqes_lookup(qmt_dqacq, intent_policy ...). Thus
+                * we can't init and add new lqes to don't overwrite already
+                * added.
+                */
+               if (!qti_lqes_inited(env) && need_id_notify)
                        qmt_set_id_notify(env, qmt, lqe);
        }
 
index 3ccd4d4..235782c 100644 (file)
@@ -284,6 +284,7 @@ struct qmt_thread_info *qmt_info(const struct lu_env *env)
 }
 
 #define qti_lqes_num(env)      (qmt_info(env)->qti_lqes_num)
+#define qti_lqes_inited(env)   (qmt_info(env)->qti_lqes_num)
 #define qti_lqes_cnt(env)      (qmt_info(env)->qti_lqes_cnt)
 #define qti_glbl_lqe_idx(env)  (qmt_info(env)->qti_glbl_lqe_idx)
 #define qti_lqes(env)          (qti_lqes_num(env) > QMT_MAX_POOL_NUM ? \
index d6b3534..ec556d4 100755 (executable)
@@ -4852,7 +4852,7 @@ test_72()
 }
 run_test 72 "lfs quota --pool prints only pool's OSTs"
 
-test_73()
+test_73a()
 {
        local qpool="qpool1"
 
@@ -4864,7 +4864,44 @@ test_73()
 
        test_default_quota "-u" "data" "qpool1"
 }
-run_test 73 "default limits at OST Pool Quotas"
+run_test 73a "default limits at OST Pool Quotas"
+
+test_73b()
+{
+       local TESTFILE1="$DIR/$tdir/$tfile-1"
+       local limit=20 #20M
+       local qpool="qpool1"
+
+       mds_supports_qp
+
+       setup_quota_test || error "setup quota failed with $?"
+       stack_trap cleanup_quota_test EXIT
+       quota_init
+       set_ost_qtype $QTYPE || error "enable ost quota failed"
+
+       # pool quotas don't work properly without global limit
+       $LFS setquota -u $TSTUSR -b 0 -B ${limit}M -i 0 -I 0 $DIR ||
+               error "set global limit failed"
+
+       pool_add $qpool || error "pool_add failed"
+       pool_add_targets $qpool 0 $((OSTCOUNT - 1)) ||
+               error "pool_add_targets failed"
+
+       log "set default quota for $qpool"
+       $LFS setquota -U --pool $qpool -b ${limit}M -B ${limit}M $DIR ||
+               error "set default quota failed"
+
+       log "Write from user that hasn't lqe"
+       # Check that it doesn't cause a panic or a deadlock
+       # due to nested lqe lookups that rewrite 1st lqe in qti_lqes array.
+       # Have to use RUNAS_ID as resetquota creates lqes in
+       # the beginning for TSTUSR/TSTUSR2 when sets limits to 0.
+       runas -u $RUNAS_ID -g $RUNAS_GID $DD of=$TESTFILE1 count=10
+
+       cancel_lru_locks osc
+       sync; sync_all_data || true
+}
+run_test 73b "default OST Pool Quotas limit for new user"
 
 test_74()
 {