From 301d76a71176c186129231ddd1323bae21100165 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Wed, 21 Jul 2021 18:14:01 +0300 Subject: [PATCH] LU-14876 out: don't connect to busy MDS-MDS export MDS-MDS connection is missing check for busy requests upon reconnect, so resent can be executed concurrently with original request. - in ptlrpc_server_check_resend_in_progress() remove exception for bulk requests, they can be compared by XID nowadays. This prevents OUT requests vs resent execution as well. - fix messages in target_handle_connect() to report correct information about connection details - in out_handle() check for last_xid only once per OUT_UPDATE - test 110m is added to recovery-small to reproduce the issue Signed-off-by: Mikhail Pershin Change-Id: I2ad183674d59a2cdeab0037bd8551c607b10ffeb Reviewed-on: https://review.whamcloud.com/44390 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_lib.c | 24 ++++++++++----------- lustre/ptlrpc/service.c | 7 ------- lustre/target/out_handler.c | 47 ++++++++++++++++++++++++------------------ lustre/tests/recovery-small.sh | 32 ++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 39 deletions(-) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 4d37048..4f29756 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1203,13 +1203,12 @@ int target_handle_connect(struct ptlrpc_request *req) * processing, so we needs to allow lw_client to be connected at * anytime, instead of only the initial connection */ - lw_client = (data->ocd_connect_flags & OBD_CONNECT_LIGHTWEIGHT) != 0; + lw_client = OCD_HAS_FLAG(data, LIGHTWEIGHT); if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) { initial_conn = true; - mds_conn = (data->ocd_connect_flags & OBD_CONNECT_MDS) != 0; - mds_mds_conn = (data->ocd_connect_flags & - OBD_CONNECT_MDS_MDS) != 0; + mds_conn = OCD_HAS_FLAG(data, MDS); + mds_mds_conn = OCD_HAS_FLAG(data, MDS_MDS); /* * OBD_CONNECT_MNE_SWAB is removed at 2.14 @@ -1267,26 +1266,27 @@ int target_handle_connect(struct ptlrpc_request *req) export = NULL; rc = -EALREADY; } else if ((mds_conn || (lw_client && initial_conn) || - data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) && - export->exp_connection != NULL) { + OCD_HAS_FLAG(data, MDS_MDS)) && export->exp_connection) { spin_unlock(&export->exp_lock); if (req->rq_peer.nid != export->exp_connection->c_peer.nid) { /* MDS or LWP reconnected after failover. */ LCONSOLE_WARN("%s: Received %s connection from %s, removing former export from %s\n", target->obd_name, - mds_conn ? "MDS" : "LWP", + lw_client ? "LWP" : "MDS", libcfs_nid2str(req->rq_peer.nid), libcfs_nid2str(export->exp_connection->c_peer.nid)); } else { - /* New MDS connection from the same NID. */ - LCONSOLE_WARN("%s: Received new %s connection from %s, removing former export from same NID\n", + /* New connection from the same NID. */ + LCONSOLE_WARN("%s: Received new %s connection from %s, %s former export from same NID\n", target->obd_name, - mds_conn ? "MDS" : "LWP", - libcfs_nid2str(req->rq_peer.nid)); + lw_client ? "LWP" : "MDS", + libcfs_nid2str(req->rq_peer.nid), + OCD_HAS_FLAG(data, MDS_MDS) ? + "keep" : "remove"); } if (req->rq_peer.nid == export->exp_connection->c_peer.nid && - data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) { + OCD_HAS_FLAG(data, MDS_MDS)) { /* * Because exports between MDTs will always be * kept, let's do not fail such export if they diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 0ae0108..6a68a19 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1644,13 +1644,6 @@ ptlrpc_server_check_resend_in_progress(struct ptlrpc_request *req) return NULL; /* - * bulk request are aborted upon reconnect, don't try to - * find a match - */ - if (req->rq_bulk_write || req->rq_bulk_read) - return NULL; - - /* * This list should not be longer than max_requests in * flights on the client, so it is not all that long. * Also we only hit this codepath in case of a resent diff --git a/lustre/target/out_handler.c b/lustre/target/out_handler.c index 45de5bc..57c0d91 100644 --- a/lustre/target/out_handler.c +++ b/lustre/target/out_handler.c @@ -52,7 +52,7 @@ static void out_reconstruct(const struct lu_env *env, struct dt_device *dt, struct object_update_reply *reply, int index) { - CDEBUG(D_INFO, "%s: fork reply reply %p index %d: rc = %d\n", + CDEBUG(D_HA, "%s: fork reply reply %p index %d: rc = %d\n", dt_obd_name(dt), reply, index, 0); object_update_result_insert(reply, NULL, 0, index, 0); @@ -64,16 +64,10 @@ typedef void (*out_reconstruct_t)(const struct lu_env *env, struct object_update_reply *reply, int index); -static inline int out_check_resent(const struct lu_env *env, - struct dt_device *dt, - struct dt_object *obj, - struct ptlrpc_request *req, - out_reconstruct_t reconstruct, - struct object_update_reply *reply, - int index) +static inline bool out_check_resent(struct ptlrpc_request *req) { if (likely(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT))) - return 0; + return false; if (req_xid_is_last(req)) { struct lsd_client_data *lcd; @@ -89,14 +83,12 @@ static inline int out_check_resent(const struct lu_env *env, lustre_msg_set_transno(req->rq_repmsg, req->rq_transno); lustre_msg_set_status(req->rq_repmsg, req->rq_status); - DEBUG_REQ(D_RPCTRACE, req, "restoring resent RPC"); - - reconstruct(env, dt, obj, reply, index); - return 1; + DEBUG_REQ(D_HA, req, "reconstruct resent RPC"); + return true; } - DEBUG_REQ(D_HA, req, "no reply for RESENT req (have %lld)", - req->rq_export->exp_target_data.ted_lcd->lcd_last_xid); - return 0; + DEBUG_REQ(D_HA, req, "reprocess RESENT req, last_xid is %lld", + req->rq_export->exp_target_data.ted_lcd->lcd_last_xid); + return false; } static int out_create(struct tgt_session_info *tsi) @@ -973,6 +965,8 @@ int out_handle(struct tgt_session_info *tsi) int rc1 = 0; int ouh_size, reply_size; int updates; + bool need_reconstruct; + ENTRY; req_capsule_set(pill, &RQF_OUT_UPDATE); @@ -1115,6 +1109,8 @@ int out_handle(struct tgt_session_info *tsi) tti->tti_u.update.tti_update_reply = reply; tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi)); + need_reconstruct = out_check_resent(pill->rc_req); + /* Walk through updates in the request to execute them */ for (i = 0; i < update_buf_count; i++) { struct tgt_handler *h; @@ -1162,12 +1158,19 @@ int out_handle(struct tgt_session_info *tsi) /* Check resend case only for modifying RPC */ if (h->th_flags & IS_MUTABLE) { - struct ptlrpc_request *req = tgt_ses_req(tsi); + /* sanity check for last XID changing */ + if (unlikely(!need_reconstruct && + req_xid_is_last(pill->rc_req))) { + DEBUG_REQ(D_ERROR, pill->rc_req, + "unexpected last XID change"); + GOTO(next, rc = -EINVAL); + } - if (out_check_resent(env, dt, dt_obj, req, - out_reconstruct, reply, - reply_index)) + if (need_reconstruct) { + out_reconstruct(env, dt, dt_obj, reply, + reply_index); GOTO(next, rc = 0); + } if (dt->dd_rdonly) GOTO(next, rc = -EROFS); @@ -1176,6 +1179,10 @@ int out_handle(struct tgt_session_info *tsi) /* start transaction for modification RPC only */ if (h->th_flags & IS_MUTABLE && current_batchid == -1) { current_batchid = update->ou_batchid; + + if (reply_index == 0) + CFS_RACE(OBD_FAIL_PTLRPC_RESEND_RACE); + rc = out_tx_start(env, dt, ta, tsi->tsi_exp); if (rc != 0) GOTO(next, rc); diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 1c1b86d..cc54479 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2272,6 +2272,38 @@ test_110k() { } run_test 110k "FID_QUERY failed during recovery" +test_110m () { + (( $(lustre_version_code $SINGLEMDS) >= $(version_code 2.14.52) )) || + skip "Need MDS version at least 2.14.52" + (( $MDSCOUNT >= 2 )) || skip "needs at least 2 MDTs" + local remote_dir=$DIR/$tdir/remote_dir + local mdccli + local uuid + local diridx + + mkdir_on_mdt0 $DIR/$tdir + +#define OBD_FAIL_PTLRPC_RESEND_RACE 0x0525 + do_facet mds1 $LCTL set_param fail_loc=0x80000525 + $LFS mkdir -i 1 -c2 $remote_dir & + mkdir_pid=$! + sleep 3 + # initiate the re-connect & re-send + mdccli=$(do_facet mds2 $LCTL dl | + awk '/MDT0000-osp-MDT0001/ {print $4;}') + uuid=$(do_facet mds2 $LCTL get_param -n osp.$mdccli.mdt_conn_uuid) + echo "conn_uuid=$uuid" + do_facet mds2 $LCTL set_param "osp.$mdccli.import=connection=$uuid" + + wait $mkdir_pid + (( $? == 0 )) || error "mkdir failed" + + diridx=$($LFS getstripe -m $remote_dir) + (( $diridx == 1 )) || error "$diridx != 1" + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 110m "update resent vs original RPC race" + # LU-2844 mdt prepare fail should not cause umount oops test_111 () { -- 1.8.3.1