From: Vitaly Fertman Date: Tue, 7 Aug 2018 14:59:13 +0000 (+0300) Subject: LU-11251 mdt: ASSERTION (req_transno < next_transno) failed X-Git-Tag: 2.12.54~104 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=53764826b95f1264f17cdb186f8d8a3120d90806 LU-11251 mdt: ASSERTION (req_transno < next_transno) failed An update request is checked for duplicates by xid in is_req_replayed_by_update(). However xid is unique per client only. It may happen that there are 2 requests with the same xid from different clients. Perform lookup by transno, it is unique per MDT. Change-Id: If00b69f01451c659292c004aa296a6ea36680d3c Cray-bug-id: LUS-6015 Signed-off-by: Andriy Skulysh Reviewed-by: Vitaly Fertman Reviewed-by: Alexander Boyko Tested-by: Elena Gryaznova Reviewed-on: https://review.whamcloud.com/33001 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alexandr Boyko Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index f2a07ef..4a741d3 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -442,6 +442,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520 #define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 +#define OBD_FAIL_PTLRPC_ROUND_XID 0x530 #define OBD_FAIL_PTLRPC_CONNECT_RACE 0x531 #define OBD_FAIL_OBD_PING_NET 0x600 diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 5a587c7..5d51d91 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -2381,7 +2381,7 @@ static void replay_request_or_update(struct lu_env *env, struct distribute_txn_replay_req *dtrq; dtrq = distribute_txn_lookup_finish_list(tdtd, - req->rq_xid); + transno); LASSERT(dtrq != NULL); spin_lock(&tdtd->tdtd_replay_list_lock); list_del_init(&dtrq->dtrq_list); @@ -2394,7 +2394,8 @@ static void replay_request_or_update(struct lu_env *env, } LASSERT(trd->trd_processing_task == current_pid()); - DEBUG_REQ(D_HA, req, "processing t%lld from %s", + DEBUG_REQ(D_HA, req, "processing x%llu t%lld from %s", + req->rq_xid, lustre_msg_get_transno(req->rq_reqmsg), libcfs_nid2str(req->rq_peer.nid)); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 8ba71f6..cb21f3e 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -689,6 +689,9 @@ static inline void ptlrpc_assign_next_xid(struct ptlrpc_request *req) spin_unlock(&req->rq_import->imp_lock); } +static __u64 ptlrpc_last_xid; +static spinlock_t ptlrpc_last_xid_lock; + int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, __u32 version, int opcode, char **bufs, struct ptlrpc_cli_ctx *ctx) @@ -740,7 +743,6 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, ptlrpc_at_set_req_timeout(request); lustre_msg_set_opc(request->rq_reqmsg, opcode); - ptlrpc_assign_next_xid(request); /* Let's setup deadline for req/reply/bulk unlink for opcode. */ if (cfs_fail_val == opcode) { @@ -755,6 +757,11 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) { fail_t = &request->rq_reply_deadline; fail2_t = &request->rq_bulk_deadline; + } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_ROUND_XID)) { + time64_t now = ktime_get_real_seconds(); + spin_lock(&ptlrpc_last_xid_lock); + ptlrpc_last_xid = ((__u64)now >> 4) << 24; + spin_unlock(&ptlrpc_last_xid_lock); } if (fail_t) { @@ -771,6 +778,7 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, msleep(4 * MSEC_PER_SEC); } } + ptlrpc_assign_next_xid(request); RETURN(0); @@ -3219,9 +3227,6 @@ void ptlrpc_abort_set(struct ptlrpc_request_set *set) } } -static __u64 ptlrpc_last_xid; -static spinlock_t ptlrpc_last_xid_lock; - /** * Initialize the XID for the node. This is common among all requests on * this node, and only requires the property that it is monotonically diff --git a/lustre/target/out_handler.c b/lustre/target/out_handler.c index a3b7c36..910f9b0 100644 --- a/lustre/target/out_handler.c +++ b/lustre/target/out_handler.c @@ -568,6 +568,11 @@ static int out_index_insert(struct tgt_session_info *tsi) tti->tti_tea.ta_handle, tti->tti_u.update.tti_update_reply, tti->tti_u.update.tti_update_reply_index); + + CDEBUG(D_INFO, "%s: "DFID" index insert %s: rc = %d\n", + tgt_name(tsi->tsi_tgt), PFID(lu_object_fid(&obj->do_lu)), + name, rc); + RETURN(rc); } diff --git a/lustre/target/update_recovery.c b/lustre/target/update_recovery.c index ac47105..b483a26 100644 --- a/lustre/target/update_recovery.c +++ b/lustre/target/update_recovery.c @@ -502,6 +502,8 @@ void dtrq_destroy(struct distribute_txn_replay_req *dtrq) struct distribute_txn_replay_req_sub *tmp; LASSERT(list_empty(&dtrq->dtrq_list)); + CDEBUG(D_HA, "destroy x%llu t%llu\n", dtrq->dtrq_xid, + dtrq->dtrq_master_transno); spin_lock(&dtrq->dtrq_sub_list_lock); list_for_each_entry_safe(dtrqs, tmp, &dtrq->dtrq_sub_list, dtrqs_list) { struct sub_thandle_cookie *stc; @@ -607,14 +609,14 @@ EXPORT_SYMBOL(distribute_txn_get_next_transno); struct distribute_txn_replay_req * distribute_txn_lookup_finish_list(struct target_distribute_txn_data *tdtd, - __u64 xid) + __u64 transno) { struct distribute_txn_replay_req *dtrq = NULL; struct distribute_txn_replay_req *iter; spin_lock(&tdtd->tdtd_replay_list_lock); list_for_each_entry(iter, &tdtd->tdtd_replay_finish_list, dtrq_list) { - if (iter->dtrq_xid == xid) { + if (iter->dtrq_master_transno == transno) { dtrq = iter; break; } @@ -631,7 +633,8 @@ bool is_req_replayed_by_update(struct ptlrpc_request *req) if (tgt->lut_tdtd == NULL) return false; - dtrq = distribute_txn_lookup_finish_list(tgt->lut_tdtd, req->rq_xid); + dtrq = distribute_txn_lookup_finish_list(tgt->lut_tdtd, + lustre_msg_get_transno(req->rq_reqmsg)); if (dtrq == NULL) return false; @@ -1093,6 +1096,7 @@ static void update_recovery_update_ses(struct lu_env *env, lrd->lrd_result = le32_to_cpu(lrd->lrd_result); lrd->lrd_client_gen = le32_to_cpu(lrd->lrd_client_gen); + CDEBUG(D_HA, "xid=%llu transno=%llu\n", lrd->lrd_xid, lrd->lrd_transno); if (lrd->lrd_transno != tgt_th_info(env)->tti_transno) return; diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 47479fb..f9af360 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -1028,6 +1028,38 @@ test_28() { } run_test 28 "lock replay should be ordered: waiting after granted" +test_29() { + local dir0=$DIR/$tdir/d0 + local dir1=$DIR/$tdir/d1 + + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + [ $CLIENTCOUNT -lt 2 ] && skip "needs >= 2 clients" && return 0 + [ "$CLIENT1" == "$CLIENT2" ] && + skip "clients must be on different nodes" && return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -i0 $dir0 + $LFS mkdir -i1 $dir1 + sync + + replay_barrier mds2 + # create a remote dir, drop reply + #define OBD_FAIL_PTLRPC_ROUND_XID 0x530 + $LCTL set_param fail_loc=0x530 fail_val=36 + #define OBD_FAIL_MDS_REINT_MULTI_NET_REP 0x15a + do_facet mds2 $LCTL set_param fail_loc=0x8000015a + echo make remote dir d0 for $dir0 + $LFS mkdir -i1 -c1 $dir0/d3 & + sleep 1 + + echo make local dir d1 for $dir1 + do_node $CLIENT2 $LCTL set_param fail_loc=0x530 fail_val=36 + do_node $CLIENT2 mkdir $dir1/d4 + + fail mds2 +} +run_test 29 "replay vs update with the same xid" + complete $SECONDS SLEEP=$((SECONDS - $NOW)) [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP