From 3dc261c06434eceee3ba9ef86d1f376954b2d234 Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Thu, 27 Oct 2022 22:54:18 +0300 Subject: [PATCH] LU-16046 ldlm: group lock unlock fix The original LU-9964 fix had a problem because with many pages in memory grouplock unlock takes 10+ seconds just to discard them. The current patch makes grouplock unlock thread to be not atomic, but makes a new grouplock enqueue to wait until previous CBPENDING lock gets destroyed. HPE-bug-id: LUS-10644 Signed-off-by: Vitaly Fertman Change-Id: I7798138b953320c477ce60c4e34eac40ada95a69 Reviewed-on: https://es-gerrit.dev.cray.com/161411 Reviewed-by: Andriy Skulysh Reviewed-by: Alexander Boyko Tested-by: Alexander Lezhoev Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49008 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexander Reviewed-by: Oleg Drokin --- lustre/include/lustre_dlm.h | 1 + lustre/ldlm/ldlm_lock.c | 28 +++++++++++++++++++++++++--- lustre/ldlm/ldlm_request.c | 3 +-- lustre/mdc/mdc_dev.c | 16 +++++++++------- lustre/osc/osc_lock.c | 2 -- lustre/osc/osc_request.c | 14 +++++++++----- 6 files changed, 45 insertions(+), 19 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index ff211a5..e40a79b 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -1015,6 +1015,7 @@ enum ldlm_match_flags { LDLM_MATCH_AST = BIT(1), LDLM_MATCH_AST_ANY = BIT(2), LDLM_MATCH_RIGHT = BIT(3), + LDLM_MATCH_GROUP = BIT(4), }; /** diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index b1b00d3..58ae27f 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -393,6 +393,7 @@ static int ldlm_lock_destroy_internal(struct ldlm_lock *lock) return 0; } ldlm_set_destroyed(lock); + wake_up(&lock->l_waitq); if (lock->l_export && lock->l_export->exp_lock_hash) { /* NB: it's safe to call cfs_hash_del() even lock isn't @@ -1176,10 +1177,12 @@ static bool lock_matches(struct ldlm_lock *lock, struct ldlm_match_data *data) * whose parents already hold a lock so forward progress * can still happen. */ if (ldlm_is_cbpending(lock) && - !(data->lmd_flags & LDLM_FL_CBPENDING)) + !(data->lmd_flags & LDLM_FL_CBPENDING) && + !(data->lmd_match & LDLM_MATCH_GROUP)) return false; - if (!(data->lmd_match & LDLM_MATCH_UNREF) && ldlm_is_cbpending(lock) && + if (!(data->lmd_match & (LDLM_MATCH_UNREF | LDLM_MATCH_GROUP)) && + ldlm_is_cbpending(lock) && lock->l_readers == 0 && lock->l_writers == 0) return false; @@ -1242,7 +1245,12 @@ static bool lock_matches(struct ldlm_lock *lock, struct ldlm_match_data *data) return false; matched: - if (data->lmd_flags & LDLM_FL_TEST_LOCK) { + /** + * In case the lock is a CBPENDING grouplock, just pin it and return, + * we need to wait until it gets to DESTROYED. + */ + if ((data->lmd_flags & LDLM_FL_TEST_LOCK) || + (ldlm_is_cbpending(lock) && (data->lmd_match & LDLM_MATCH_GROUP))) { LDLM_LOCK_GET(lock); ldlm_lock_touch_in_lru(lock); } else { @@ -1424,6 +1432,7 @@ enum ldlm_mode ldlm_lock_match_with_skip(struct ldlm_namespace *ns, }; struct ldlm_resource *res; struct ldlm_lock *lock; + struct ldlm_lock *group_lock; int matched; ENTRY; @@ -1444,6 +1453,8 @@ enum ldlm_mode ldlm_lock_match_with_skip(struct ldlm_namespace *ns, RETURN(0); } +repeat: + group_lock = NULL; LDLM_RESOURCE_ADDREF(res); lock_res(res); if (res->lr_type == LDLM_EXTENT) @@ -1453,8 +1464,19 @@ enum ldlm_mode ldlm_lock_match_with_skip(struct ldlm_namespace *ns, if (!lock && !(flags & LDLM_FL_BLOCK_GRANTED)) lock = search_queue(&res->lr_waiting, &data); matched = lock ? mode : 0; + + if (lock && ldlm_is_cbpending(lock) && + (data.lmd_match & LDLM_MATCH_GROUP)) + group_lock = lock; unlock_res(res); LDLM_RESOURCE_DELREF(res); + + if (group_lock) { + l_wait_event_abortable(group_lock->l_waitq, + ldlm_is_destroyed(lock)); + LDLM_LOCK_RELEASE(lock); + goto repeat; + } ldlm_resource_putref(res); if (lock) { diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 62595ac..8d609d1 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -1032,8 +1032,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, lock->l_conn_export = exp; lock->l_export = NULL; lock->l_blocking_ast = einfo->ei_cb_bl; - lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL | - LDLM_FL_ATOMIC_CB)); + lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL)); lock->l_activity = ktime_get_real_seconds(); /* lock not sent to server yet */ diff --git a/lustre/mdc/mdc_dev.c b/lustre/mdc/mdc_dev.c index 4f9e3cc..339d18a 100644 --- a/lustre/mdc/mdc_dev.c +++ b/lustre/mdc/mdc_dev.c @@ -714,7 +714,8 @@ int mdc_enqueue_send(const struct lu_env *env, struct obd_export *exp, struct ldlm_intent *lit; enum ldlm_mode mode; bool glimpse = *flags & LDLM_FL_HAS_INTENT; - __u64 match_flags = *flags; + __u64 search_flags = *flags; + __u64 match_flags = 0; LIST_HEAD(cancels); int rc, count; int lvb_size; @@ -726,11 +727,14 @@ int mdc_enqueue_send(const struct lu_env *env, struct obd_export *exp, if (einfo->ei_mode == LCK_PR) mode |= LCK_PW; - match_flags |= LDLM_FL_LVB_READY; + search_flags |= LDLM_FL_LVB_READY; if (glimpse) - match_flags |= LDLM_FL_BLOCK_GRANTED; - mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id, - einfo->ei_type, policy, mode, &lockh); + search_flags |= LDLM_FL_BLOCK_GRANTED; + if (mode == LCK_GROUP) + match_flags = LDLM_MATCH_GROUP; + mode = ldlm_lock_match_with_skip(obd->obd_namespace, search_flags, 0, + res_id, einfo->ei_type, policy, mode, + &lockh, match_flags); if (mode) { struct ldlm_lock *matched; @@ -973,8 +977,6 @@ int mdc_lock_init(const struct lu_env *env, struct cl_object *obj, ols->ols_flags = flags; ols->ols_speculative = !!(enqflags & CEF_SPECULATIVE); - if (lock->cll_descr.cld_mode == CLM_GROUP) - ols->ols_flags |= LDLM_FL_ATOMIC_CB; if (ols->ols_flags & LDLM_FL_HAS_INTENT) { ols->ols_flags |= LDLM_FL_BLOCK_GRANTED; diff --git a/lustre/osc/osc_lock.c b/lustre/osc/osc_lock.c index 1ebd516..6453b31 100644 --- a/lustre/osc/osc_lock.c +++ b/lustre/osc/osc_lock.c @@ -1233,8 +1233,6 @@ int osc_lock_init(const struct lu_env *env, oscl->ols_flags = osc_enq2ldlm_flags(enqflags); oscl->ols_speculative = !!(enqflags & CEF_SPECULATIVE); - if (lock->cll_descr.cld_mode == CLM_GROUP) - oscl->ols_flags |= LDLM_FL_ATOMIC_CB; if (oscl->ols_flags & LDLM_FL_HAS_INTENT) { oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED; diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 57bc02a..f7323c85 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2994,7 +2994,8 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, struct lustre_handle lockh = { 0 }; struct ptlrpc_request *req = NULL; int intent = *flags & LDLM_FL_HAS_INTENT; - __u64 match_flags = *flags; + __u64 search_flags = *flags; + __u64 match_flags = 0; enum ldlm_mode mode; int rc; ENTRY; @@ -3023,11 +3024,14 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, * matching a lock; speculative lock requests do not need to, * because they will not actually use the lock. */ if (!speculative) - match_flags |= LDLM_FL_LVB_READY; + search_flags |= LDLM_FL_LVB_READY; if (intent != 0) - match_flags |= LDLM_FL_BLOCK_GRANTED; - mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id, - einfo->ei_type, policy, mode, &lockh); + search_flags |= LDLM_FL_BLOCK_GRANTED; + if (mode == LCK_GROUP) + match_flags = LDLM_MATCH_GROUP; + mode = ldlm_lock_match_with_skip(obd->obd_namespace, search_flags, 0, + res_id, einfo->ei_type, policy, mode, + &lockh, match_flags); if (mode) { struct ldlm_lock *matched; -- 1.8.3.1