Whamcloud - gitweb
LU-14876 out: don't connect to busy MDS-MDS export 90/44390/5
authorMikhail Pershin <mpershin@whamcloud.com>
Wed, 21 Jul 2021 15:14:01 +0000 (18:14 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 18 Aug 2021 01:57:42 +0000 (01:57 +0000)
MDS-MDS connection is missing check for busy requests upon
reconnect, so resent can be executed concurrently with
original request.

- in ptlrpc_server_check_resend_in_progress() remove exception
  for bulk requests, they can be compared by XID nowadays.
  This prevents OUT requests vs resent execution as well.
- fix messages in target_handle_connect() to report correct
  information about connection details
- in out_handle() check for last_xid only once per OUT_UPDATE
- test 110m is added to recovery-small to reproduce the issue

Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: I2ad183674d59a2cdeab0037bd8551c607b10ffeb
Reviewed-on: https://review.whamcloud.com/44390
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_lib.c
lustre/ptlrpc/service.c
lustre/target/out_handler.c
lustre/tests/recovery-small.sh

index 4d37048..4f29756 100644 (file)
@@ -1203,13 +1203,12 @@ int target_handle_connect(struct ptlrpc_request *req)
         * processing, so we needs to allow lw_client to be connected at
         * anytime, instead of only the initial connection
         */
-       lw_client = (data->ocd_connect_flags & OBD_CONNECT_LIGHTWEIGHT) != 0;
+       lw_client = OCD_HAS_FLAG(data, LIGHTWEIGHT);
 
        if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) {
                initial_conn = true;
-               mds_conn = (data->ocd_connect_flags & OBD_CONNECT_MDS) != 0;
-               mds_mds_conn = (data->ocd_connect_flags &
-                               OBD_CONNECT_MDS_MDS) != 0;
+               mds_conn = OCD_HAS_FLAG(data, MDS);
+               mds_mds_conn = OCD_HAS_FLAG(data, MDS_MDS);
 
                /*
                 * OBD_CONNECT_MNE_SWAB is removed at 2.14
@@ -1267,26 +1266,27 @@ int target_handle_connect(struct ptlrpc_request *req)
                export = NULL;
                rc = -EALREADY;
        } else if ((mds_conn || (lw_client && initial_conn) ||
-                  data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) &&
-                  export->exp_connection != NULL) {
+                  OCD_HAS_FLAG(data, MDS_MDS)) && export->exp_connection) {
                spin_unlock(&export->exp_lock);
                if (req->rq_peer.nid != export->exp_connection->c_peer.nid) {
                        /* MDS or LWP reconnected after failover. */
                        LCONSOLE_WARN("%s: Received %s connection from %s, removing former export from %s\n",
                                      target->obd_name,
-                                     mds_conn ? "MDS" : "LWP",
+                                     lw_client ? "LWP" : "MDS",
                                      libcfs_nid2str(req->rq_peer.nid),
                                      libcfs_nid2str(export->exp_connection->c_peer.nid));
                } else {
-                       /* New MDS connection from the same NID. */
-                       LCONSOLE_WARN("%s: Received new %s connection from %s, removing former export from same NID\n",
+                       /* New connection from the same NID. */
+                       LCONSOLE_WARN("%s: Received new %s connection from %s, %s former export from same NID\n",
                                      target->obd_name,
-                                     mds_conn ? "MDS" : "LWP",
-                                     libcfs_nid2str(req->rq_peer.nid));
+                                     lw_client ? "LWP" : "MDS",
+                                     libcfs_nid2str(req->rq_peer.nid),
+                                     OCD_HAS_FLAG(data, MDS_MDS) ?
+                                     "keep" : "remove");
                }
 
                if (req->rq_peer.nid == export->exp_connection->c_peer.nid &&
-                   data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) {
+                   OCD_HAS_FLAG(data, MDS_MDS)) {
                        /*
                         * Because exports between MDTs will always be
                         * kept, let's do not fail such export if they
index 0ae0108..6a68a19 100644 (file)
@@ -1644,13 +1644,6 @@ ptlrpc_server_check_resend_in_progress(struct ptlrpc_request *req)
                return NULL;
 
        /*
-        * bulk request are aborted upon reconnect, don't try to
-        * find a match
-        */
-       if (req->rq_bulk_write || req->rq_bulk_read)
-               return NULL;
-
-       /*
         * This list should not be longer than max_requests in
         * flights on the client, so it is not all that long.
         * Also we only hit this codepath in case of a resent
index 45de5bc..57c0d91 100644 (file)
@@ -52,7 +52,7 @@ static void out_reconstruct(const struct lu_env *env, struct dt_device *dt,
                            struct object_update_reply *reply,
                            int index)
 {
-       CDEBUG(D_INFO, "%s: fork reply reply %p index %d: rc = %d\n",
+       CDEBUG(D_HA, "%s: fork reply reply %p index %d: rc = %d\n",
               dt_obd_name(dt), reply, index, 0);
 
        object_update_result_insert(reply, NULL, 0, index, 0);
@@ -64,16 +64,10 @@ typedef void (*out_reconstruct_t)(const struct lu_env *env,
                                  struct object_update_reply *reply,
                                  int index);
 
-static inline int out_check_resent(const struct lu_env *env,
-                                  struct dt_device *dt,
-                                  struct dt_object *obj,
-                                  struct ptlrpc_request *req,
-                                  out_reconstruct_t reconstruct,
-                                  struct object_update_reply *reply,
-                                  int index)
+static inline bool out_check_resent(struct ptlrpc_request *req)
 {
        if (likely(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT)))
-               return 0;
+               return false;
 
        if (req_xid_is_last(req)) {
                struct lsd_client_data *lcd;
@@ -89,14 +83,12 @@ static inline int out_check_resent(const struct lu_env *env,
                lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
                lustre_msg_set_status(req->rq_repmsg, req->rq_status);
 
-               DEBUG_REQ(D_RPCTRACE, req, "restoring resent RPC");
-
-               reconstruct(env, dt, obj, reply, index);
-               return 1;
+               DEBUG_REQ(D_HA, req, "reconstruct resent RPC");
+               return true;
        }
-       DEBUG_REQ(D_HA, req, "no reply for RESENT req (have %lld)",
-                req->rq_export->exp_target_data.ted_lcd->lcd_last_xid);
-       return 0;
+       DEBUG_REQ(D_HA, req, "reprocess RESENT req, last_xid is %lld",
+                 req->rq_export->exp_target_data.ted_lcd->lcd_last_xid);
+       return false;
 }
 
 static int out_create(struct tgt_session_info *tsi)
@@ -973,6 +965,8 @@ int out_handle(struct tgt_session_info *tsi)
        int                             rc1 = 0;
        int                             ouh_size, reply_size;
        int                             updates;
+       bool need_reconstruct;
+
        ENTRY;
 
        req_capsule_set(pill, &RQF_OUT_UPDATE);
@@ -1115,6 +1109,8 @@ int out_handle(struct tgt_session_info *tsi)
        tti->tti_u.update.tti_update_reply = reply;
        tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi));
 
+       need_reconstruct = out_check_resent(pill->rc_req);
+
        /* Walk through updates in the request to execute them */
        for (i = 0; i < update_buf_count; i++) {
                struct tgt_handler      *h;
@@ -1162,12 +1158,19 @@ int out_handle(struct tgt_session_info *tsi)
 
                        /* Check resend case only for modifying RPC */
                        if (h->th_flags & IS_MUTABLE) {
-                               struct ptlrpc_request *req = tgt_ses_req(tsi);
+                               /* sanity check for last XID changing */
+                               if (unlikely(!need_reconstruct &&
+                                            req_xid_is_last(pill->rc_req))) {
+                                       DEBUG_REQ(D_ERROR, pill->rc_req,
+                                                 "unexpected last XID change");
+                                       GOTO(next, rc = -EINVAL);
+                               }
 
-                               if (out_check_resent(env, dt, dt_obj, req,
-                                                    out_reconstruct, reply,
-                                                    reply_index))
+                               if (need_reconstruct) {
+                                       out_reconstruct(env, dt, dt_obj, reply,
+                                                       reply_index);
                                        GOTO(next, rc = 0);
+                               }
 
                                if (dt->dd_rdonly)
                                        GOTO(next, rc = -EROFS);
@@ -1176,6 +1179,10 @@ int out_handle(struct tgt_session_info *tsi)
                        /* start transaction for modification RPC only */
                        if (h->th_flags & IS_MUTABLE && current_batchid == -1) {
                                current_batchid = update->ou_batchid;
+
+                               if (reply_index == 0)
+                                       CFS_RACE(OBD_FAIL_PTLRPC_RESEND_RACE);
+
                                rc = out_tx_start(env, dt, ta, tsi->tsi_exp);
                                if (rc != 0)
                                        GOTO(next, rc);
index 1c1b86d..cc54479 100755 (executable)
@@ -2272,6 +2272,38 @@ test_110k() {
 }
 run_test 110k "FID_QUERY failed during recovery"
 
+test_110m () {
+       (( $(lustre_version_code $SINGLEMDS) >= $(version_code 2.14.52) )) ||
+               skip "Need MDS version at least 2.14.52"
+       (( $MDSCOUNT >= 2 )) || skip "needs at least 2 MDTs"
+       local remote_dir=$DIR/$tdir/remote_dir
+       local mdccli
+       local uuid
+       local diridx
+
+       mkdir_on_mdt0 $DIR/$tdir
+
+#define OBD_FAIL_PTLRPC_RESEND_RACE 0x0525
+       do_facet mds1 $LCTL set_param fail_loc=0x80000525
+       $LFS mkdir -i 1 -c2 $remote_dir &
+       mkdir_pid=$!
+       sleep 3
+       # initiate the re-connect & re-send
+       mdccli=$(do_facet mds2 $LCTL dl |
+               awk '/MDT0000-osp-MDT0001/ {print $4;}')
+       uuid=$(do_facet mds2 $LCTL get_param -n osp.$mdccli.mdt_conn_uuid)
+       echo "conn_uuid=$uuid"
+       do_facet mds2 $LCTL set_param "osp.$mdccli.import=connection=$uuid"
+
+       wait $mkdir_pid
+       (( $? == 0 )) || error "mkdir failed"
+
+       diridx=$($LFS getstripe -m $remote_dir)
+       (( $diridx == 1 )) || error "$diridx != 1"
+       rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 110m "update resent vs original RPC race"
+
 # LU-2844 mdt prepare fail should not cause umount oops
 test_111 ()
 {