From: Liang Zhen Date: Mon, 17 Nov 2014 15:35:54 +0000 (+0800) Subject: LU-5604 tgt: return missed fail ids X-Git-Tag: 2.7.50~8 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=4de90170e2573321e7691364d1d527aedfd25ff9 LU-5604 tgt: return missed fail ids OBD_FAIL_LDLM_REPLY is missing from tgt_enqueue, and it's actually not suitable for tgt_enqueue anymore because tgt_enqueue() is a common handler now. This patch includes a few changes: - tgt_enqueue sets tgt_session_info::tsi_reply_fail_id to OBD_FAIL_MGS/MDS/OST_LDLM_REPLY_NET based on type of target. - rewrite test_52 of replay-single, the only reason that test_52 can pass is because there is a typo: $CHECKSTAT -t file $DIR/$tfile-* which should be $DIR/$tfile - add definitions for OBD_FAIL_LDLM_SRV_CP/BL/GL_AST and resolve OBD_FAIL conflictions - OBD_FAIL_UPDATE_OBJ_NET_REP was renamed to OBD_FAIL_OUT_UPDATE_NET_REP but referenced with old name in tests. - OBD_FAIL_MDS_FAIL_LOV_LOG_ADD check is obsoleted as well as tests. Meanwhile the OSP code was updated to fix panic in case of error. - OBD_FAIL_TGT_LAST_REPLAY is removed along with test. It was never used and it seems it was even introduced by mistake. Test-Parameters: envdefinitions=SLOW=yes alwaysuploadlogs testlist=replay-dual,replay-single Signed-off-by: Liang Zhen Signed-off-by: Mikhail Pershin Change-Id: If5113e459f5628047e17114b6bc20ba910f3c142 Reviewed-on: http://review.whamcloud.com/12232 Reviewed-by: Alex Zhuravlev Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 6c43bb4..e896549 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -246,6 +246,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_MDS_RENAME2 0x154 #define OBD_FAIL_MDS_RENAME3 0x155 #define OBD_FAIL_MDS_RENAME4 0x156 +#define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157 /* layout lock */ #define OBD_FAIL_MDS_NO_LL_GETATTR 0x170 @@ -354,6 +355,10 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322 #define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323 +#define OBD_FAIL_LDLM_SRV_BL_AST 0x324 +#define OBD_FAIL_LDLM_SRV_CP_AST 0x325 +#define OBD_FAIL_LDLM_SRV_GL_AST 0x326 + /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 @@ -421,7 +426,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_TGT_REPLAY_DROP 0x707 #define OBD_FAIL_TGT_FAKE_EXP 0x708 #define OBD_FAIL_TGT_REPLAY_DELAY 0x709 -#define OBD_FAIL_TGT_LAST_REPLAY 0x710 +/* #define OBD_FAIL_TGT_LAST_REPLAY 0x710 (obsoleted) */ #define OBD_FAIL_TGT_CLIENT_ADD 0x711 #define OBD_FAIL_TGT_RCVG_FLAG 0x712 #define OBD_FAIL_TGT_DELAY_CONDITIONAL 0x713 @@ -448,6 +453,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_MGS_TARGET_REG_NET 0x90a #define OBD_FAIL_MGS_TARGET_DEL_NET 0x90b #define OBD_FAIL_MGS_CONFIG_READ_NET 0x90c +#define OBD_FAIL_MGS_LDLM_REPLY_NET 0x90d #define OBD_FAIL_QUOTA_DQACQ_NET 0xA01 #define OBD_FAIL_QUOTA_EDQUOT 0xA02 @@ -599,6 +605,11 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_ONCE CFS_FAIL_ONCE #define OBD_FAILED CFS_FAILED +#define LUT_FAIL_CLASS(fail_id) (((fail_id) >> 8) << 16) +#define LUT_FAIL_MGT LUT_FAIL_CLASS(OBD_FAIL_MGS) +#define LUT_FAIL_MDT LUT_FAIL_CLASS(OBD_FAIL_MDS) +#define LUT_FAIL_OST LUT_FAIL_CLASS(OBD_FAIL_OST) + extern atomic_t libcfs_kmemory; #ifdef CONFIG_PROC_FS diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index a277287..7f70349 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -920,7 +920,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) LASSERT(lock != NULL); LASSERT(data != NULL); - if (OBD_FAIL_PRECHECK(OBD_FAIL_OST_LDLM_REPLY_NET)) { + if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_SRV_CP_AST)) { LDLM_DEBUG(lock, "dropping CP AST"); RETURN(0); } diff --git a/lustre/osp/osp_sync.c b/lustre/osp/osp_sync.c index 00c4955..d0b6d7c 100644 --- a/lustre/osp/osp_sync.c +++ b/lustre/osp/osp_sync.c @@ -344,25 +344,23 @@ static int osp_sync_add_rec(const struct lu_env *env, struct osp_device *d, ctxt = llog_get_context(d->opd_obd, LLOG_MDS_OST_ORIG_CTXT); if (ctxt == NULL) RETURN(-ENOMEM); + rc = llog_add(env, ctxt->loc_handle, &osi->osi_hdr, &osi->osi_cookie, th); llog_ctxt_put(ctxt); - CDEBUG(D_OTHER, "%s: new record "DOSTID":%lu/%lu: %d\n", - d->opd_obd->obd_name, POSTID(&osi->osi_cookie.lgc_lgl.lgl_oi), - (unsigned long) osi->osi_cookie.lgc_lgl.lgl_ogen, - (unsigned long) osi->osi_cookie.lgc_index, rc); - - if (rc > 0) - rc = 0; - - if (likely(rc == 0)) { + if (likely(rc >= 0)) { + CDEBUG(D_OTHER, "%s: new record "DOSTID":%lu/%lu: %d\n", + d->opd_obd->obd_name, + POSTID(&osi->osi_cookie.lgc_lgl.lgl_oi), + (unsigned long)osi->osi_cookie.lgc_lgl.lgl_ogen, + (unsigned long)osi->osi_cookie.lgc_index, rc); spin_lock(&d->opd_syn_lock); d->opd_syn_changes++; spin_unlock(&d->opd_syn_lock); } - - RETURN(rc); + /* return 0 always here, error case just cause no llog record */ + RETURN(0); } int osp_sync_add(const struct lu_env *env, struct osp_object *o, diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index dd93805..55b1869 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -1237,6 +1237,20 @@ int tgt_enqueue(struct tgt_session_info *tsi) if (rc) RETURN(err_serious(rc)); + switch (LUT_FAIL_CLASS(tsi->tsi_reply_fail_id)) { + case LUT_FAIL_MDT: + tsi->tsi_reply_fail_id = OBD_FAIL_MDS_LDLM_REPLY_NET; + break; + case LUT_FAIL_OST: + tsi->tsi_reply_fail_id = OBD_FAIL_OST_LDLM_REPLY_NET; + break; + case LUT_FAIL_MGT: + tsi->tsi_reply_fail_id = OBD_FAIL_MGS_LDLM_REPLY_NET; + break; + default: + tsi->tsi_reply_fail_id = OBD_FAIL_LDLM_REPLY; + break; + } RETURN(req->rq_status); } EXPORT_SYMBOL(tgt_enqueue); diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 5cab1a4..69c7b20 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -901,9 +901,15 @@ test_25() { drop_ldlm_cancel "multiop $DIR2/$tfile Ow512" & sleep 1 -#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213 # failover, replay and resend replayed waiting locks - do_facet ost1 lctl set_param fail_loc=0x80000213 + if [ $(lustre_version_code ost1) -ge $(version_code 2.6.90) ]; then + #define OBD_FAIL_LDLM_SRV_CP_AST 0x325 + do_facet ost1 lctl set_param fail_loc=0x80000325 + else + #define OBD_FAIL_OST_LDLM_REPLY_NET 0x213 + do_facet ost1 lctl set_param fail_loc=0x80000213 + fi + fail ost1 # multiop does not finish because CP AST is skipped; diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 2dfe615..25f49a7 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -1069,17 +1069,28 @@ run_test 50 "Double OSC recovery, don't LASSERT (3812)" # b3764 timed out lock replay test_52() { - touch $DIR/$tfile - cancel_lru_locks mdc + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.90) ] && + skip "MDS prior to 2.6.90 handle LDLM_REPLY_NET incorrectly" && + return 0 - multiop $DIR/$tfile s || return 1 - replay_barrier $SINGLEMDS -#define OBD_FAIL_LDLM_REPLY 0x30c - do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000030c" - fail $SINGLEMDS || return 2 - do_facet $SINGLEMDS "lctl set_param fail_loc=0x0" + touch $DIR/$tfile + cancel_lru_locks mdc + + multiop_bg_pause $DIR/$tfile s_s || return 1 + mpid=$! + + #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157 + lctl set_param -n ldlm.cancel_unused_locks_before_replay "0" + do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000157" - $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true + fail $SINGLEMDS || return 2 + kill -USR1 $mpid + wait $mpid || return 3 + + do_facet $SINGLEMDS "lctl set_param fail_loc=0x0" + lctl set_param fail_loc=0x0 + lctl set_param -n ldlm.cancel_unused_locks_before_replay "1" + rm -f $DIR/$tfile } run_test 52 "time out lock replay (3764)" @@ -1988,22 +1999,6 @@ test_73b() { } run_test 73b "open(O_CREAT), unlink, replay, reconnect at open_replay reply, close" -test_73c() { - multiop_bg_pause $DIR/$tfile O_tSc || return 3 - pid=$! - rm -f $DIR/$tfile - - replay_barrier $SINGLEMDS -#define OBD_FAIL_TGT_LAST_REPLAY 0x710 - do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000710" - fail $SINGLEMDS - kill -USR1 $pid - wait $pid || return 1 - [ -e $DIR/$tfile ] && return 2 - return 0 -} -run_test 73c "open(O_CREAT), unlink, replay, reconnect at last_replay, close" - # bug 18554 test_74() { local clients=${CLIENTS:-$HOSTNAME} @@ -2043,7 +2038,7 @@ test_80a() { local remote_dir=$DIR/$tdir/remote_dir mkdir -p $DIR/$tdir - #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701 + #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701 do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701 $LFS mkdir -i $MDTIDX $remote_dir & local CLIENT_PID=$! @@ -2487,26 +2482,6 @@ test_81h() { } run_test 81h "DNE: unlink remote dir, drop request reply, fail 2 MDTs" -test_83a() { - mkdir -p $DIR/$tdir - createmany -o $DIR/$tdir/$tfile- 10 || return 1 -#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 - do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000140" - unlinkmany $DIR/$tdir/$tfile- 10 || return 2 -} -run_test 83a "fail log_add during unlink recovery" - -test_83b() { - mkdir -p $DIR/$tdir - createmany -o $DIR/$tdir/$tfile- 10 || return 1 - replay_barrier $SINGLEMDS - unlinkmany $DIR/$tdir/$tfile- 10 || return 2 -#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 - do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000140" - fail $SINGLEMDS -} -run_test 83b "fail log_add during unlink recovery" - test_84a() { #define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144 do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000144" @@ -2870,7 +2845,7 @@ test_100a() { #otherwise it may create single stripe dir here $LFS setdirstripe -i1 $DIR/$tdir/remote_dir - #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701 + #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701 do_facet mds$((MDTIDX+1)) lctl set_param fail_loc=0x1701 $LFS setdirstripe -i0 -c2 $striped_dir & local CLIENT_PID=$!