From 68fb53ad4bb2dbc2104d19613546ecf2534d97d5 Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Fri, 4 Dec 2020 19:35:19 +0300 Subject: [PATCH 1/1] LU-14182 lov: cancel layout lock on replay deadlock layout locks are not replayed and instead cancelled as unused, what requires to take lov_conf_lock. the semaphore may be already taken by cl_lock_flush() which prepares a new IO which is not be able to be sent to MDS as it is in the recovery. HPE-bug-id: LUS-9232 Signed-off-by: Vitaly Fertman Change-Id: I1a1a91a81c19ad4deca9ff581107512642f0b666 Reviewed-by: Alexey Lyashkov Reviewed-by: Andriy Skulysh Tested-by: Jenkins Build User Reviewed-on: https://review.whamcloud.com/40867 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexey Lyashkov Reviewed-by: Andriy Skulysh Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/ldlm/ldlm_request.c | 2 ++ lustre/llite/namei.c | 2 ++ lustre/lov/lov_cl_internal.h | 10 +++++++--- lustre/lov/lov_object.c | 44 ++++++++++++++++++++++++++------------------ lustre/tests/replay-dual.sh | 20 ++++++++++++++++++++ 6 files changed, 58 insertions(+), 21 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index f820c87..f681534 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -395,6 +395,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LDLM_PROLONG_PAUSE 0x32b #define OBD_FAIL_LDLM_LOCAL_CANCEL_PAUSE 0x32c #define OBD_FAIL_LDLM_LOCK_REPLAY 0x32d +#define OBD_FAIL_LDLM_REPLAY_PAUSE 0x32e /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index d94cb1a..55994cd 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -2531,6 +2531,8 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns) "Dropping as many unused locks as possible before replay for namespace %s (%d)\n", ldlm_ns_name(ns), ns->ns_nr_unused); + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_REPLAY_PAUSE, cfs_fail_val); + /* * We don't need to care whether or not LRU resize is enabled * because the LDLM_LRU_FLAG_NO_WAIT policy doesn't use the diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 59b6d8e..6814416 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -212,6 +212,8 @@ static int ll_dom_lock_cancel(struct inode *inode, struct ldlm_lock *lock) if (IS_ERR(env)) RETURN(PTR_ERR(env)); + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_REPLAY_PAUSE, cfs_fail_val); + /* reach MDC layer to flush data under the DoM ldlm lock */ rc = cl_object_flush(env, lli->lli_clob, lock); if (rc == -ENODATA) { diff --git a/lustre/lov/lov_cl_internal.h b/lustre/lov/lov_cl_internal.h index 45914f1..c5b3c3e 100644 --- a/lustre/lov/lov_cl_internal.h +++ b/lustre/lov/lov_cl_internal.h @@ -249,6 +249,11 @@ struct lov_mirror_entry { unsigned short lre_end; /* end index of this mirror */ }; +enum lov_object_flags { + /* Layout is invalid, set when layout lock is lost */ + LO_LAYOUT_INVALID = 0x1, +}; + /** * lov-specific file state. * @@ -279,10 +284,9 @@ struct lov_object { */ enum lov_layout_type lo_type; /** - * True if layout is invalid. This bit is cleared when layout lock - * is lost. + * Object flags. */ - bool lo_layout_invalid; + unsigned long lo_obj_flags; /** * How many IOs are on going on this object. Layout can be changed * only if there is no active IO. diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index fac827e..d3186cd 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -178,7 +178,7 @@ static int lov_init_sub(const struct lu_env *env, struct lov_object *lov, old_obj = lu_object_locate(&parent->coh_lu, &lov_device_type); LASSERT(old_obj != NULL); old_lov = cl2lov(lu2cl(old_obj)); - if (old_lov->lo_layout_invalid) { + if (test_bit(LO_LAYOUT_INVALID, &old_lov->lo_obj_flags)) { /* the object's layout has already changed but isn't * refreshed */ lu_object_unhash(env, &subobj->co_lu); @@ -633,7 +633,7 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev, LASSERT(lsm->lsm_entry_count > 0); LASSERT(lov->lo_lsm == NULL); lov->lo_lsm = lsm_addref(lsm); - lov->lo_layout_invalid = true; + set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags); dump_lsm(D_INODE, lsm); @@ -919,7 +919,8 @@ static void lov_fini_released(const struct lu_env *env, struct lov_object *lov, static int lov_print_empty(const struct lu_env *env, void *cookie, lu_printer_t p, const struct lu_object *o) { - (*p)(env, cookie, "empty %d\n", lu2lov(o)->lo_layout_invalid); + (*p)(env, cookie, "empty %d\n", + test_bit(LO_LAYOUT_INVALID, &lu2lov(o)->lo_obj_flags)); return 0; } @@ -932,8 +933,8 @@ static int lov_print_composite(const struct lu_env *env, void *cookie, (*p)(env, cookie, "entries: %d, %s, lsm{%p 0x%08X %d %u}:\n", lsm->lsm_entry_count, - lov->lo_layout_invalid ? "invalid" : "valid", lsm, - lsm->lsm_magic, atomic_read(&lsm->lsm_refc), + test_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags) ? "invalid" : + "valid", lsm, lsm->lsm_magic, atomic_read(&lsm->lsm_refc), lsm->lsm_layout_gen); for (i = 0; i < lsm->lsm_entry_count; i++) { @@ -962,8 +963,8 @@ static int lov_print_released(const struct lu_env *env, void *cookie, (*p)(env, cookie, "released: %s, lsm{%p 0x%08X %d %u}:\n", - lov->lo_layout_invalid ? "invalid" : "valid", lsm, - lsm->lsm_magic, atomic_read(&lsm->lsm_refc), + test_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags) ? "invalid" : + "valid", lsm, lsm->lsm_magic, atomic_read(&lsm->lsm_refc), lsm->lsm_layout_gen); return 0; } @@ -976,7 +977,8 @@ static int lov_print_foreign(const struct lu_env *env, void *cookie, (*p)(env, cookie, "foreign: %s, lsm{%p 0x%08X %d %u}:\n", - lov->lo_layout_invalid ? "invalid" : "valid", lsm, + test_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags) ? + "invalid" : "valid", lsm, lsm->lsm_magic, atomic_read(&lsm->lsm_refc), lsm->lsm_layout_gen); (*p)(env, cookie, @@ -1372,14 +1374,14 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, dump_lsm(D_INODE, lsm); } - lov_conf_lock(lov); if (conf->coc_opc == OBJECT_CONF_INVALIDATE) { - lov->lo_layout_invalid = true; - GOTO(out, result = 0); + set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags); + GOTO(out_lsm, result = 0); } + lov_conf_lock(lov); if (conf->coc_opc == OBJECT_CONF_WAIT) { - if (lov->lo_layout_invalid && + if (test_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags) && atomic_read(&lov->lo_active_ios) > 0) { lov_conf_unlock(lov); result = lov_layout_wait(env, lov); @@ -1397,25 +1399,30 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, (lov->lo_lsm->lsm_entries[0]->lsme_pattern == lsm->lsm_entries[0]->lsme_pattern))) { /* same version of layout */ - lov->lo_layout_invalid = false; + clear_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags); GOTO(out, result = 0); } /* will change layout - check if there still exists active IO. */ if (atomic_read(&lov->lo_active_ios) > 0) { - lov->lo_layout_invalid = true; + set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags); GOTO(out, result = -EBUSY); } result = lov_layout_change(env, lov, lsm, conf); - lov->lo_layout_invalid = result != 0; + if (result) + set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags); + else + clear_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags); EXIT; out: lov_conf_unlock(lov); +out_lsm: lov_lsm_put(lsm); - CDEBUG(D_INODE, DFID" lo_layout_invalid=%d\n", - PFID(lu_object_fid(lov2lu(lov))), lov->lo_layout_invalid); + CDEBUG(D_INODE, DFID" lo_layout_invalid=%u\n", + PFID(lu_object_fid(lov2lu(lov))), + test_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags)); RETURN(result); } @@ -2250,7 +2257,8 @@ static struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov) lsm = lsm_addref(lov->lo_lsm); CDEBUG(D_INODE, "lsm %p addref %d/%d by %p.\n", lsm, atomic_read(&lsm->lsm_refc), - lov->lo_layout_invalid, current); + test_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags), + current); } lov_conf_thaw(lov); return lsm; diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index fea7dbb..b92df6b 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -1060,6 +1060,26 @@ test_29() { } run_test 29 "replay vs update with the same xid" +test_30() { + $LFS setstripe -E 1m -L mdt -E -1 $DIR/$tfile + #first write to have no problems with grants + dd if=/dev/zero of=$DIR/$tfile bs=4k count=10 || + error "dd on client failed" + dd if=/dev/zero of=$DIR/$tfile bs=4k count=10 seek=10 || + error "dd on client failed" + + #define OBD_FAIL_LDLM_REPLAY_PAUSE 0x32e + lctl set_param fail_loc=0x32e fail_val=4 + dd of=/dev/null if=$DIR2/$tfile & + local pid=$! + sleep 1 + + fail $SINGLEMDS + + wait $pid || error "dd on client failed" +} +run_test 30 "layout lock replay is not blocked on IO" + complete $SECONDS SLEEP=$((SECONDS - $NOW)) [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP -- 1.8.3.1