From: Andriy Skulysh Date: Mon, 16 Dec 2019 20:09:37 +0000 (+0200) Subject: LU-13456 ldlm: fix reprocessing of locks with more bits X-Git-Tag: 2.14.56~103 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=05e6ccd344e7eba44e43230fa2fa0a1b3b6115c4;hp=f3314706b4e5c21f14908650decd92a30fdc1db9 LU-13456 ldlm: fix reprocessing of locks with more bits Reprocessing check queues should be extended with just granted lock inodebits. ldlm_reprocess_all() should be called on BL AST race. Change-Id: Ifd232062068481c1c62fa2f2a14c7778d4402260 Fixes: 2250e072c3785 ("LU-12017 ldlm: DoM truncate deadlock") HPE-bug-id: LUS-8224 Signed-off-by: Andriy Skulysh Reviewed-by: Alexander Boyko Reviewed-by: Vitaly Fertman Reviewed-on: https://review.whamcloud.com/38244 Reviewed-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo Reviewed-by: Vitaly Fertman Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index ea5efc9..ee4dea1 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -1361,7 +1361,7 @@ typedef int (*ldlm_reprocessing_policy)(struct ldlm_resource *res, struct list_head *queue, struct list_head *work_list, enum ldlm_process_intention intention, - struct ldlm_lock *hint); + __u64 hint); /** * Return values for lock iterators. @@ -1612,7 +1612,7 @@ enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, __u64 *bits); void ldlm_lock_mode_downgrade(struct ldlm_lock *lock, enum ldlm_mode new_mode); void ldlm_lock_cancel(struct ldlm_lock *lock); -void ldlm_reprocess_all(struct ldlm_resource *res, struct ldlm_lock *hint); +void ldlm_reprocess_all(struct ldlm_resource *res, __u64 hint); void ldlm_reprocess_recovery_done(struct ldlm_namespace *ns); void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh); void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 634bb38..76b2487 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -604,7 +604,7 @@ reprocess: restart: ldlm_reprocess_queue(res, &res->lr_waiting, &rpc_list, - LDLM_PROCESS_RESCAN, NULL); + LDLM_PROCESS_RESCAN, 0); unlock_res_and_lock(req); rc = ldlm_run_ast_work(ns, &rpc_list, diff --git a/lustre/ldlm/ldlm_inodebits.c b/lustre/ldlm/ldlm_inodebits.c index 0de9a20..41257a9 100644 --- a/lustre/ldlm/ldlm_inodebits.c +++ b/lustre/ldlm/ldlm_inodebits.c @@ -68,7 +68,7 @@ int ldlm_reprocess_inodebits_queue(struct ldlm_resource *res, struct list_head *queue, struct list_head *work_list, enum ldlm_process_intention intention, - struct ldlm_lock *hint) + __u64 mask) { __u64 flags; int rc = LDLM_ITER_CONTINUE; @@ -87,11 +87,15 @@ int ldlm_reprocess_inodebits_queue(struct ldlm_resource *res, if (intention == LDLM_PROCESS_RECOVERY) return ldlm_reprocess_queue(res, queue, work_list, intention, - NULL); + 0); restart: CDEBUG(D_DLMTRACE, "--- Reprocess resource "DLDLMRES" (%p)\n", PLDLMRES(res), res); + if (mask) + CDEBUG(D_DLMTRACE, "Hint %llx\n", mask); + else + mask = MDS_INODELOCK_FULL; for (i = 0; i < MDS_INODELOCK_NUMBITS; i++) { LIST_HEAD(rpc_list); @@ -99,9 +103,7 @@ restart: struct ldlm_lock *pending; struct ldlm_ibits_node *node; - if (list_empty(head)) - continue; - if (hint && !(hint->l_policy_data.l_inodebits.bits & BIT(i))) + if (list_empty(head) || !(mask & (1 << i))) continue; node = list_entry(head->next, struct ldlm_ibits_node, @@ -115,8 +117,8 @@ restart: &err, &rpc_list); if (ldlm_is_granted(pending)) { list_splice(&rpc_list, work_list); - /* Try to grant more locks from current queue */ - i--; + mask |= pending->l_policy_data.l_inodebits.bits; + i = ffs(pending->l_policy_data.l_inodebits.bits) - 2; } else { list_splice(&rpc_list, &bl_ast_list); } @@ -129,8 +131,10 @@ restart: LDLM_WORK_BL_AST); lock_res(res); - if (rc == -ERESTART) + if (rc == -ERESTART) { + mask = 0; GOTO(restart, rc); + } } if (!list_empty(&bl_ast_list)) diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h index 33b7b8e..517ab60 100644 --- a/lustre/ldlm/ldlm_internal.h +++ b/lustre/ldlm/ldlm_internal.h @@ -143,8 +143,7 @@ void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new, #ifdef HAVE_SERVER_SUPPORT int ldlm_reprocess_queue(struct ldlm_resource *res, struct list_head *queue, struct list_head *work_list, - enum ldlm_process_intention intention, - struct ldlm_lock *hint); + enum ldlm_process_intention intention, __u64 hint); int ldlm_handle_conflict_lock(struct ldlm_lock *lock, __u64 *flags, struct list_head *rpc_list); void ldlm_discard_bl_list(struct list_head *bl_list); @@ -195,7 +194,7 @@ int ldlm_reprocess_inodebits_queue(struct ldlm_resource *res, struct list_head *queue, struct list_head *work_list, enum ldlm_process_intention intention, - struct ldlm_lock *hint); + __u64 hint); /* ldlm_extent.c */ int ldlm_process_extent_lock(struct ldlm_lock *lock, __u64 *flags, enum ldlm_process_intention intention, diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 8cd8b82..d9b1ca2 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1935,8 +1935,7 @@ out: */ int ldlm_reprocess_queue(struct ldlm_resource *res, struct list_head *queue, struct list_head *work_list, - enum ldlm_process_intention intention, - struct ldlm_lock *hint) + enum ldlm_process_intention intention, __u64 hint) { struct list_head *tmp, *pos; ldlm_processing_policy policy; @@ -2038,7 +2037,7 @@ int ldlm_handle_conflict_lock(struct ldlm_lock *lock, __u64 *flags, class_fail_export(lock->l_export); if (rc == -ERESTART) - ldlm_reprocess_all(res, NULL); + ldlm_reprocess_all(res, 0); lock_res(res); if (rc == -ERESTART) { @@ -2349,7 +2348,7 @@ out: */ static void __ldlm_reprocess_all(struct ldlm_resource *res, enum ldlm_process_intention intention, - struct ldlm_lock *hint) + __u64 hint) { LIST_HEAD(rpc_list); #ifdef HAVE_SERVER_SUPPORT @@ -2382,6 +2381,7 @@ restart: LDLM_WORK_CP_AST); if (rc == -ERESTART) { LASSERT(list_empty(&rpc_list)); + hint = 0; goto restart; } #else @@ -2396,7 +2396,7 @@ restart: EXIT; } -void ldlm_reprocess_all(struct ldlm_resource *res, struct ldlm_lock *hint) +void ldlm_reprocess_all(struct ldlm_resource *res, __u64 hint) { __ldlm_reprocess_all(res, LDLM_PROCESS_RESCAN, hint); } @@ -2408,7 +2408,7 @@ static int ldlm_reprocess_res(struct cfs_hash *hs, struct cfs_hash_bd *bd, struct ldlm_resource *res = cfs_hash_object(hs, hnode); /* This is only called once after recovery done. LU-8306. */ - __ldlm_reprocess_all(res, LDLM_PROCESS_RECOVERY, NULL); + __ldlm_reprocess_all(res, LDLM_PROCESS_RECOVERY, 0); return 0; } @@ -2555,7 +2555,7 @@ static void ldlm_cancel_lock_for_export(struct obd_export *exp, ldlm_lvbo_update(res, lock, NULL, 1); ldlm_lock_cancel(lock); if (!exp->exp_obd->obd_stopping) - ldlm_reprocess_all(res, lock); + ldlm_reprocess_all(res, lock->l_policy_data.l_inodebits.bits); ldlm_resource_putref(res); ecl->ecl_loop++; @@ -2720,7 +2720,8 @@ void ldlm_lock_mode_downgrade(struct ldlm_lock *lock, enum ldlm_mode new_mode) ldlm_grant_lock(lock, NULL); unlock_res_and_lock(lock); - ldlm_reprocess_all(lock->l_resource, lock); + ldlm_reprocess_all(lock->l_resource, + lock->l_policy_data.l_inodebits.bits); EXIT; #endif diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index ebaa261..03ca051 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1200,7 +1200,7 @@ int ldlm_glimpse_locks(struct ldlm_resource *res, rc = ldlm_run_ast_work(ldlm_res_to_ns(res), gl_work_list, LDLM_WORK_GL_AST); if (rc == -ERESTART) - ldlm_reprocess_all(res, NULL); + ldlm_reprocess_all(res, 0); RETURN(rc); } @@ -1548,12 +1548,13 @@ retry: ldlm_lock_destroy_nolock(lock); unlock_res_and_lock(lock); } - ldlm_reprocess_all(lock->l_resource, lock); + ldlm_reprocess_all(lock->l_resource, lock->l_policy_data.l_inodebits.bits); } if (!err && !ldlm_is_cbpending(lock) && dlm_req->lock_desc.l_resource.lr_type != LDLM_FLOCK) - ldlm_reprocess_all(lock->l_resource, lock); + ldlm_reprocess_all(lock->l_resource, + lock->l_policy_data.l_inodebits.bits); LDLM_LOCK_RELEASE(lock); } @@ -1659,7 +1660,10 @@ int ldlm_handle_convert0(struct ptlrpc_request *req, ldlm_clear_blocking_data(lock); unlock_res_and_lock(lock); - ldlm_reprocess_all(lock->l_resource, NULL); + /* All old bits should be reprocessed to send new BL AST if + * it wasn't sent earlier due to LDLM_FL_AST_SENT bit set. + * */ + ldlm_reprocess_all(lock->l_resource, bits); } dlm_rep->lock_handle = lock->l_remote_handle; @@ -1733,7 +1737,7 @@ int ldlm_request_cancel(struct ptlrpc_request *req, */ if (res != pres) { if (pres != NULL) { - ldlm_reprocess_all(pres, NULL); + ldlm_reprocess_all(pres, 0); LDLM_RESOURCE_DELREF(pres); ldlm_resource_putref(pres); } @@ -1764,7 +1768,7 @@ int ldlm_request_cancel(struct ptlrpc_request *req, LDLM_LOCK_PUT(lock); } if (pres != NULL) { - ldlm_reprocess_all(pres, NULL); + ldlm_reprocess_all(pres, 0); LDLM_RESOURCE_DELREF(pres); ldlm_resource_putref(pres); } @@ -2693,15 +2697,18 @@ static int ldlm_revoke_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd, void ldlm_revoke_export_locks(struct obd_export *exp) { + int rc; LIST_HEAD(rpc_list); - ENTRY; cfs_hash_for_each_nolock(exp->exp_lock_hash, ldlm_revoke_lock_cb, &rpc_list, 0); - ldlm_run_ast_work(exp->exp_obd->obd_namespace, &rpc_list, + rc = ldlm_run_ast_work(exp->exp_obd->obd_namespace, &rpc_list, LDLM_WORK_REVOKE_AST); + if (rc == -ERESTART) + ldlm_reprocess_recovery_done(exp->exp_obd->obd_namespace); + EXIT; } EXPORT_SYMBOL(ldlm_revoke_export_locks); diff --git a/lustre/ldlm/ldlm_reclaim.c b/lustre/ldlm/ldlm_reclaim.c index cf4c87f..d371dc2 100644 --- a/lustre/ldlm/ldlm_reclaim.c +++ b/lustre/ldlm/ldlm_reclaim.c @@ -181,6 +181,7 @@ static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count, { struct ldlm_reclaim_cb_data data; int idx, type, start; + int rc; ENTRY; LASSERT(*count != 0); @@ -216,7 +217,10 @@ static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count, LASSERTF(*count >= data.rcd_added, "count:%d, added:%d\n", *count, data.rcd_added); - ldlm_run_ast_work(ns, &data.rcd_rpc_list, LDLM_WORK_REVOKE_AST); + rc = ldlm_run_ast_work(ns, &data.rcd_rpc_list, LDLM_WORK_REVOKE_AST); + if (rc == -ERESTART) + ldlm_reprocess_recovery_done(ns); + *count -= data.rcd_added; EXIT; } diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index f3c75c7..bb1fa108 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -236,9 +236,8 @@ int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data) RETURN(ldlm_completion_tail(lock, data)); } - LDLM_DEBUG(lock, - "client-side enqueue returned a blocked lock, going forward"); - ldlm_reprocess_all(lock->l_resource, NULL); + LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, going forward"); + ldlm_reprocess_all(lock->l_resource, 0); RETURN(0); } EXPORT_SYMBOL(ldlm_completion_ast_async); @@ -1255,7 +1254,8 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock) } LDLM_DEBUG(lock, "server-side local cancel"); ldlm_lock_cancel(lock); - ldlm_reprocess_all(lock->l_resource, lock); + ldlm_reprocess_all(lock->l_resource, + lock->l_policy_data.l_inodebits.bits); } RETURN(rc); diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index 5bbcd29..bc5ad6a 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -2025,7 +2025,8 @@ out_unlock: } out_reprocess: - ldlm_reprocess_all(lease->l_resource, lease); + ldlm_reprocess_all(lease->l_resource, + lease->l_policy_data.l_inodebits.bits); LDLM_LOCK_PUT(lease); ma->ma_valid = 0; @@ -2235,7 +2236,8 @@ out_obj: /* the 2nd object has been used, and not swapped */ mdt_object_put(info->mti_env, o2); - ldlm_reprocess_all(lease->l_resource, lease); + ldlm_reprocess_all(lease->l_resource, + lease->l_policy_data.l_inodebits.bits); out_lease: LDLM_LOCK_PUT(lease); @@ -2356,7 +2358,8 @@ out_unlock: OBD_FREE_PTR_ARRAY(resync_ids, resync_count); out_reprocess: - ldlm_reprocess_all(lease->l_resource, lease); + ldlm_reprocess_all(lease->l_resource, + lease->l_policy_data.l_inodebits.bits); LDLM_LOCK_PUT(lease); ma->ma_valid = 0; diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 07552c5..45cdfb0 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -2179,7 +2179,8 @@ static int mdd_migrate_close(struct mdt_thread_info *info, * cancelled, it's okay to cancel it now as we've held mot_open_sem. */ ldlm_lock_cancel(lease); - ldlm_reprocess_all(lease->l_resource, lease); + ldlm_reprocess_all(lease->l_resource, + lease->l_policy_data.l_inodebits.bits); LDLM_LOCK_PUT(lease); close: