From 82dad29511b32c844c849ac4ec3ec0214077f125 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Fri, 15 Feb 2019 00:51:00 +0300 Subject: [PATCH] LU-11964 mdc: prevent glimpse lock count grow DOM locks matching tries to ignore locks with LDLM_FL_KMS_IGNORE flag during ldlm_lock_match() but checks that after ldlm_lock_match() call. Therefore if there is any lock with such flag in queue then all other locks after it are ignored and new lock is created causing big amount of locks on single resource in some access patterns. Patch extends lock_matches() function to check flags to exclude and adds ldlm_lock_match_with_skip()p to use that when needed. Corresponding test was added in sanity-dom.sh Test-Parameters: testlist=sanity-dom Lustre-change: https://review.whamcloud.com/34261 Lustre-commit: b915221b6d0f3457fd9dd202a9d14c5f8385bf47 Signed-off-by: Mikhail Pershin Change-Id: Ic45ca10f0e603e79a3a00e4fde13a5fae15ea5fc Reviewed-by: Patrick Farrell Reviewed-by: Lai Siyao Signed-off-by: Minh Diep Reviewed-on: https://review.whamcloud.com/34504 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre_dlm.h | 25 +++++-- lustre/include/obd_support.h | 1 + lustre/ldlm/ldlm_lock.c | 160 ++++++++++++++++++++----------------------- lustre/mdc/mdc_dev.c | 28 +++++--- lustre/tests/sanity-dom.sh | 42 +++++++++--- 5 files changed, 149 insertions(+), 107 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index c5660da..6293d1c 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -1494,10 +1494,27 @@ void ldlm_lock_fail_match_locked(struct ldlm_lock *lock); void ldlm_lock_fail_match(struct ldlm_lock *lock); void ldlm_lock_allow_match(struct ldlm_lock *lock); void ldlm_lock_allow_match_locked(struct ldlm_lock *lock); -enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, - const struct ldlm_res_id *, enum ldlm_type type, - union ldlm_policy_data *, enum ldlm_mode mode, - struct lustre_handle *, int unref); +enum ldlm_mode ldlm_lock_match_with_skip(struct ldlm_namespace *ns, + __u64 flags, __u64 skip_flags, + const struct ldlm_res_id *res_id, + enum ldlm_type type, + union ldlm_policy_data *policy, + enum ldlm_mode mode, + struct lustre_handle *lh, + int unref); +static inline enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, + __u64 flags, + const struct ldlm_res_id *res_id, + enum ldlm_type type, + union ldlm_policy_data *policy, + enum ldlm_mode mode, + struct lustre_handle *lh, + int unref) +{ + return ldlm_lock_match_with_skip(ns, flags, 0, res_id, type, policy, + mode, lh, unref); +} + enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, __u64 *bits); void ldlm_lock_mode_downgrade(struct ldlm_lock *lock, enum ldlm_mode new_mode); diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index cacbe00..ec5c90e 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -483,6 +483,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MDC_LIGHTWEIGHT 0x805 #define OBD_FAIL_MDC_CLOSE 0x806 #define OBD_FAIL_MDC_MERGE 0x807 +#define OBD_FAIL_MDC_GLIMPSE_DDOS 0x808 #define OBD_FAIL_MGS 0x900 #define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901 diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 5e7afe6..78a11a9 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1146,6 +1146,7 @@ struct lock_match_data { enum ldlm_mode *lmd_mode; union ldlm_policy_data *lmd_policy; __u64 lmd_flags; + __u64 lmd_skip_flags; int lmd_unref; }; @@ -1217,6 +1218,10 @@ static int lock_matches(struct ldlm_lock *lock, struct lock_match_data *data) if (!equi(data->lmd_flags & LDLM_FL_LOCAL_ONLY, ldlm_is_local(lock))) return INTERVAL_ITER_CONT; + /* Filter locks by skipping flags */ + if (data->lmd_skip_flags & lock->l_flags) + return INTERVAL_ITER_CONT; + if (data->lmd_flags & LDLM_FL_TEST_LOCK) { LDLM_LOCK_GET(lock); ldlm_lock_touch_in_lru(lock); @@ -1372,24 +1377,27 @@ EXPORT_SYMBOL(ldlm_lock_allow_match); * keep caller code unchanged), the context failure will be discovered by * caller sometime later. */ -enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, - const struct ldlm_res_id *res_id, - enum ldlm_type type, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - struct lustre_handle *lockh, int unref) +enum ldlm_mode ldlm_lock_match_with_skip(struct ldlm_namespace *ns, + __u64 flags, __u64 skip_flags, + const struct ldlm_res_id *res_id, + enum ldlm_type type, + union ldlm_policy_data *policy, + enum ldlm_mode mode, + struct lustre_handle *lockh, int unref) { struct lock_match_data data = { - .lmd_old = NULL, - .lmd_lock = NULL, - .lmd_mode = &mode, - .lmd_policy = policy, - .lmd_flags = flags, - .lmd_unref = unref, + .lmd_old = NULL, + .lmd_lock = NULL, + .lmd_mode = &mode, + .lmd_policy = policy, + .lmd_flags = flags, + .lmd_skip_flags = skip_flags, + .lmd_unref = unref, }; struct ldlm_resource *res; struct ldlm_lock *lock; - int rc = 0; + int matched; + ENTRY; if (ns == NULL) { @@ -1410,98 +1418,78 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, LDLM_RESOURCE_ADDREF(res); lock_res(res); - if (res->lr_type == LDLM_EXTENT) lock = search_itree(res, &data); else lock = search_queue(&res->lr_granted, &data); - if (lock != NULL) - GOTO(out, rc = 1); - if (flags & LDLM_FL_BLOCK_GRANTED) - GOTO(out, rc = 0); - lock = search_queue(&res->lr_waiting, &data); - if (lock != NULL) - GOTO(out, rc = 1); - - EXIT; - out: - unlock_res(res); - LDLM_RESOURCE_DELREF(res); - ldlm_resource_putref(res); + if (!lock && !(flags & LDLM_FL_BLOCK_GRANTED)) + lock = search_queue(&res->lr_waiting, &data); + matched = lock ? mode : 0; + unlock_res(res); + LDLM_RESOURCE_DELREF(res); + ldlm_resource_putref(res); - if (lock) { - ldlm_lock2handle(lock, lockh); - if ((flags & LDLM_FL_LVB_READY) && + if (lock) { + ldlm_lock2handle(lock, lockh); + if ((flags & LDLM_FL_LVB_READY) && (!ldlm_is_lvb_ready(lock))) { __u64 wait_flags = LDLM_FL_LVB_READY | LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED; - struct l_wait_info lwi; - if (lock->l_completion_ast) { - int err = lock->l_completion_ast(lock, - LDLM_FL_WAIT_NOREPROC, - NULL); - if (err) { - if (flags & LDLM_FL_TEST_LOCK) - LDLM_LOCK_RELEASE(lock); - else - ldlm_lock_decref_internal(lock, - mode); - rc = 0; - goto out2; - } - } + struct l_wait_info lwi; + + if (lock->l_completion_ast) { + int err = lock->l_completion_ast(lock, + LDLM_FL_WAIT_NOREPROC, + NULL); + if (err) + GOTO(out_fail_match, matched = 0); + } - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(obd_timeout), - NULL, LWI_ON_SIGNAL_NOOP, NULL); + lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(obd_timeout), + NULL, LWI_ON_SIGNAL_NOOP, NULL); /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */ - l_wait_event(lock->l_waitq, - lock->l_flags & wait_flags, + l_wait_event(lock->l_waitq, lock->l_flags & wait_flags, &lwi); - if (!ldlm_is_lvb_ready(lock)) { - if (flags & LDLM_FL_TEST_LOCK) - LDLM_LOCK_RELEASE(lock); - else - ldlm_lock_decref_internal(lock, mode); - rc = 0; - } - } - } - out2: - if (rc) { - LDLM_DEBUG(lock, "matched (%llu %llu)", - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[2] : policy->l_extent.start, - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[3] : policy->l_extent.end); - - /* check user's security context */ - if (lock->l_conn_export && - sptlrpc_import_check_ctx( - class_exp2cliimp(lock->l_conn_export))) { - if (!(flags & LDLM_FL_TEST_LOCK)) - ldlm_lock_decref_internal(lock, mode); - rc = 0; - } + if (!ldlm_is_lvb_ready(lock)) + GOTO(out_fail_match, matched = 0); + } + + /* check user's security context */ + if (lock->l_conn_export && + sptlrpc_import_check_ctx( + class_exp2cliimp(lock->l_conn_export))) + GOTO(out_fail_match, matched = 0); - if (flags & LDLM_FL_TEST_LOCK) - LDLM_LOCK_RELEASE(lock); + LDLM_DEBUG(lock, "matched (%llu %llu)", + (type == LDLM_PLAIN || type == LDLM_IBITS) ? + res_id->name[2] : policy->l_extent.start, + (type == LDLM_PLAIN || type == LDLM_IBITS) ? + res_id->name[3] : policy->l_extent.end); + +out_fail_match: + if (flags & LDLM_FL_TEST_LOCK) + LDLM_LOCK_RELEASE(lock); + else if (!matched) + ldlm_lock_decref_internal(lock, mode); + } - } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/ - LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res " + /* less verbose for test-only */ + if (!matched && !(flags & LDLM_FL_TEST_LOCK)) { + LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res " "%llu/%llu (%llu %llu)", ns, - type, mode, res_id->name[0], res_id->name[1], - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[2] :policy->l_extent.start, - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[3] : policy->l_extent.end); - } + type, mode, res_id->name[0], res_id->name[1], + (type == LDLM_PLAIN || type == LDLM_IBITS) ? + res_id->name[2] : policy->l_extent.start, + (type == LDLM_PLAIN || type == LDLM_IBITS) ? + res_id->name[3] : policy->l_extent.end); + } if (data.lmd_old != NULL) LDLM_LOCK_PUT(data.lmd_old); - return rc ? mode : 0; + return matched; } -EXPORT_SYMBOL(ldlm_lock_match); +EXPORT_SYMBOL(ldlm_lock_match_with_skip); enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, __u64 *bits) diff --git a/lustre/mdc/mdc_dev.c b/lustre/mdc/mdc_dev.c index c06749e..19e850d 100644 --- a/lustre/mdc/mdc_dev.c +++ b/lustre/mdc/mdc_dev.c @@ -692,10 +692,16 @@ int mdc_enqueue_send(const struct lu_env *env, struct obd_export *exp, if (einfo->ei_mode == LCK_PR) mode |= LCK_PW; - if (!glimpse) + if (glimpse) match_flags |= LDLM_FL_BLOCK_GRANTED; - mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id, - einfo->ei_type, policy, mode, &lockh, 0); + /* DOM locking uses LDLM_FL_KMS_IGNORE to mark locks wich have no valid + * LVB information, e.g. canceled locks or locks of just pruned object, + * such locks should be skipped. + */ + mode = ldlm_lock_match_with_skip(obd->obd_namespace, match_flags, + LDLM_FL_KMS_IGNORE, res_id, + einfo->ei_type, policy, mode, + &lockh, 0); if (mode) { struct ldlm_lock *matched; @@ -703,8 +709,16 @@ int mdc_enqueue_send(const struct lu_env *env, struct obd_export *exp, RETURN(ELDLM_OK); matched = ldlm_handle2lock(&lockh); - if (!matched || ldlm_is_kms_ignore(matched)) + /* this shouldn't happen but this check is kept to make + * related test fail if problem occurs + */ + if (unlikely(ldlm_is_kms_ignore(matched))) { + LDLM_ERROR(matched, "matched lock has KMS ignore flag"); goto no_match; + } + + if (OBD_FAIL_CHECK(OBD_FAIL_MDC_GLIMPSE_DDOS)) + ldlm_set_kms_ignore(matched); if (mdc_set_dom_lock_data(env, matched, einfo->ei_cbdata)) { *flags |= LDLM_FL_LVB_READY; @@ -1369,11 +1383,9 @@ static int mdc_object_ast_clear(struct ldlm_lock *lock, void *data) { ENTRY; - if ((lock->l_ast_data == NULL && !ldlm_is_kms_ignore(lock)) || - (lock->l_ast_data == data)) { + if (lock->l_ast_data == data) lock->l_ast_data = NULL; - ldlm_set_kms_ignore(lock); - } + ldlm_set_kms_ignore(lock); RETURN(LDLM_ITER_CONTINUE); } diff --git a/lustre/tests/sanity-dom.sh b/lustre/tests/sanity-dom.sh index 288ea57..dba5302 100644 --- a/lustre/tests/sanity-dom.sh +++ b/lustre/tests/sanity-dom.sh @@ -12,15 +12,6 @@ ALWAYS_EXCEPT="$SANITY_DOM_EXCEPT" [ "$SLOW" = "no" ] && EXCEPT_SLOW="" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then -# bug number for skipped test: - ALWAYS_EXCEPT+="" - if [ $MDSCOUNT -gt 1 ]; then -# bug number for skipped test: - ALWAYS_EXCEPT+="" - fi -fi - LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} . $LUSTRE/tests/test-framework.sh @@ -30,6 +21,15 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging +if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then +# bug number for skipped test: + ALWAYS_EXCEPT+="" + if [ $MDSCOUNT -gt 1 ]; then +# bug number for skipped test: + ALWAYS_EXCEPT+="" + fi +fi + [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.10.56) ]] || { skip "Need MDS version at least 2.10.56"; exit 0; } @@ -96,6 +96,30 @@ test_3() { } run_test 3 "Truncate over DoM size on different nodes" +test_4() { + local before=0 + local after=0 + + dd if=/dev/zero of=$DIR1/$tfile bs=2M count=1 + cancel_lru_locks mdc + + #define OBD_FAIL_MDC_GLIMPSE_DDOS 0x808 + $LCTL set_param fail_loc=0x80000808 + before=$(lctl get_param -n ldlm.namespaces.*mdc*.lock_count | + gawk '{cnt=cnt+$1} END{print cnt}') + for ((i=1; i < 100; i++)) + do + tail -n100 $DIR1/$tfile > /dev/null + stat -f $DIR2/$tfile > /dev/null + done + after=$(lctl get_param -n ldlm.namespaces.*mdc*.lock_count | + gawk '{cnt=cnt+$1} END{print cnt}') + [[ $((after - before)) -ge 20 ]] && + error "Too many locks found $((after - before))" + return 0 +} +run_test 4 "DoM: glimpse doesn't produce duplicated locks" + test_fsx() { local file1=$DIR1/$tfile local file2=$DIR2/$tfile -- 1.8.3.1