* processing, so we needs to allow lw_client to be connected at
* anytime, instead of only the initial connection
*/
- lw_client = (data->ocd_connect_flags & OBD_CONNECT_LIGHTWEIGHT) != 0;
+ lw_client = OCD_HAS_FLAG(data, LIGHTWEIGHT);
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) {
initial_conn = true;
- mds_conn = (data->ocd_connect_flags & OBD_CONNECT_MDS) != 0;
- mds_mds_conn = (data->ocd_connect_flags &
- OBD_CONNECT_MDS_MDS) != 0;
+ mds_conn = OCD_HAS_FLAG(data, MDS);
+ mds_mds_conn = OCD_HAS_FLAG(data, MDS_MDS);
/*
* OBD_CONNECT_MNE_SWAB is removed at 2.14
export = NULL;
rc = -EALREADY;
} else if ((mds_conn || (lw_client && initial_conn) ||
- data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) &&
- export->exp_connection != NULL) {
+ OCD_HAS_FLAG(data, MDS_MDS)) && export->exp_connection) {
spin_unlock(&export->exp_lock);
if (req->rq_peer.nid != export->exp_connection->c_peer.nid) {
/* MDS or LWP reconnected after failover. */
LCONSOLE_WARN("%s: Received %s connection from %s, removing former export from %s\n",
target->obd_name,
- mds_conn ? "MDS" : "LWP",
+ lw_client ? "LWP" : "MDS",
libcfs_nid2str(req->rq_peer.nid),
libcfs_nid2str(export->exp_connection->c_peer.nid));
} else {
- /* New MDS connection from the same NID. */
- LCONSOLE_WARN("%s: Received new %s connection from %s, removing former export from same NID\n",
+ /* New connection from the same NID. */
+ LCONSOLE_WARN("%s: Received new %s connection from %s, %s former export from same NID\n",
target->obd_name,
- mds_conn ? "MDS" : "LWP",
- libcfs_nid2str(req->rq_peer.nid));
+ lw_client ? "LWP" : "MDS",
+ libcfs_nid2str(req->rq_peer.nid),
+ OCD_HAS_FLAG(data, MDS_MDS) ?
+ "keep" : "remove");
}
if (req->rq_peer.nid == export->exp_connection->c_peer.nid &&
- data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) {
+ OCD_HAS_FLAG(data, MDS_MDS)) {
/*
* Because exports between MDTs will always be
* kept, let's do not fail such export if they
return NULL;
/*
- * bulk request are aborted upon reconnect, don't try to
- * find a match
- */
- if (req->rq_bulk_write || req->rq_bulk_read)
- return NULL;
-
- /*
* This list should not be longer than max_requests in
* flights on the client, so it is not all that long.
* Also we only hit this codepath in case of a resent
struct object_update_reply *reply,
int index)
{
- CDEBUG(D_INFO, "%s: fork reply reply %p index %d: rc = %d\n",
+ CDEBUG(D_HA, "%s: fork reply reply %p index %d: rc = %d\n",
dt_obd_name(dt), reply, index, 0);
object_update_result_insert(reply, NULL, 0, index, 0);
struct object_update_reply *reply,
int index);
-static inline int out_check_resent(const struct lu_env *env,
- struct dt_device *dt,
- struct dt_object *obj,
- struct ptlrpc_request *req,
- out_reconstruct_t reconstruct,
- struct object_update_reply *reply,
- int index)
+static inline bool out_check_resent(struct ptlrpc_request *req)
{
if (likely(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT)))
- return 0;
+ return false;
if (req_xid_is_last(req)) {
struct lsd_client_data *lcd;
lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
lustre_msg_set_status(req->rq_repmsg, req->rq_status);
- DEBUG_REQ(D_RPCTRACE, req, "restoring resent RPC");
-
- reconstruct(env, dt, obj, reply, index);
- return 1;
+ DEBUG_REQ(D_HA, req, "reconstruct resent RPC");
+ return true;
}
- DEBUG_REQ(D_HA, req, "no reply for RESENT req (have %lld)",
- req->rq_export->exp_target_data.ted_lcd->lcd_last_xid);
- return 0;
+ DEBUG_REQ(D_HA, req, "reprocess RESENT req, last_xid is %lld",
+ req->rq_export->exp_target_data.ted_lcd->lcd_last_xid);
+ return false;
}
static int out_create(struct tgt_session_info *tsi)
int rc1 = 0;
int ouh_size, reply_size;
int updates;
+ bool need_reconstruct;
+
ENTRY;
req_capsule_set(pill, &RQF_OUT_UPDATE);
tti->tti_u.update.tti_update_reply = reply;
tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi));
+ need_reconstruct = out_check_resent(pill->rc_req);
+
/* Walk through updates in the request to execute them */
for (i = 0; i < update_buf_count; i++) {
struct tgt_handler *h;
/* Check resend case only for modifying RPC */
if (h->th_flags & IS_MUTABLE) {
- struct ptlrpc_request *req = tgt_ses_req(tsi);
+ /* sanity check for last XID changing */
+ if (unlikely(!need_reconstruct &&
+ req_xid_is_last(pill->rc_req))) {
+ DEBUG_REQ(D_ERROR, pill->rc_req,
+ "unexpected last XID change");
+ GOTO(next, rc = -EINVAL);
+ }
- if (out_check_resent(env, dt, dt_obj, req,
- out_reconstruct, reply,
- reply_index))
+ if (need_reconstruct) {
+ out_reconstruct(env, dt, dt_obj, reply,
+ reply_index);
GOTO(next, rc = 0);
+ }
if (dt->dd_rdonly)
GOTO(next, rc = -EROFS);
/* start transaction for modification RPC only */
if (h->th_flags & IS_MUTABLE && current_batchid == -1) {
current_batchid = update->ou_batchid;
+
+ if (reply_index == 0)
+ CFS_RACE(OBD_FAIL_PTLRPC_RESEND_RACE);
+
rc = out_tx_start(env, dt, ta, tsi->tsi_exp);
if (rc != 0)
GOTO(next, rc);
}
run_test 110k "FID_QUERY failed during recovery"
+test_110m () {
+ (( $(lustre_version_code $SINGLEMDS) >= $(version_code 2.14.52) )) ||
+ skip "Need MDS version at least 2.14.52"
+ (( $MDSCOUNT >= 2 )) || skip "needs at least 2 MDTs"
+ local remote_dir=$DIR/$tdir/remote_dir
+ local mdccli
+ local uuid
+ local diridx
+
+ mkdir_on_mdt0 $DIR/$tdir
+
+#define OBD_FAIL_PTLRPC_RESEND_RACE 0x0525
+ do_facet mds1 $LCTL set_param fail_loc=0x80000525
+ $LFS mkdir -i 1 -c2 $remote_dir &
+ mkdir_pid=$!
+ sleep 3
+ # initiate the re-connect & re-send
+ mdccli=$(do_facet mds2 $LCTL dl |
+ awk '/MDT0000-osp-MDT0001/ {print $4;}')
+ uuid=$(do_facet mds2 $LCTL get_param -n osp.$mdccli.mdt_conn_uuid)
+ echo "conn_uuid=$uuid"
+ do_facet mds2 $LCTL set_param "osp.$mdccli.import=connection=$uuid"
+
+ wait $mkdir_pid
+ (( $? == 0 )) || error "mkdir failed"
+
+ diridx=$($LFS getstripe -m $remote_dir)
+ (( $diridx == 1 )) || error "$diridx != 1"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 110m "update resent vs original RPC race"
+
# LU-2844 mdt prepare fail should not cause umount oops
test_111 ()
{