X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fldlm%2Fldlm_lib.c;h=1d4d6b78ff5c85509585fdfa0d9a9d75285324f8;hp=b92c1de9f29102141f881dec8be6af1a5833c787;hb=892078e3b566c04471e7dcf2c28e66f2f3584f93;hpb=f6995cf04407dff15d6ca79ca44cfa97dc6eb014

diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index b92c1de..1d4d6b7 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -260,17 +260,18 @@ static int osc_on_mdt(char *obdname)
  */
 int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 {
-        struct client_obd *cli = &obddev->u.cli;
-        struct obd_import *imp;
-        struct obd_uuid server_uuid;
-        int rq_portal, rp_portal, connect_op;
-        char *name = obddev->obd_type->typ_name;
-        ldlm_ns_type_t ns_type = LDLM_NS_TYPE_UNKNOWN;
-        int rc;
-        ENTRY;
+	struct client_obd *cli = &obddev->u.cli;
+	struct obd_import *imp;
+	struct obd_uuid server_uuid;
+	int rq_portal, rp_portal, connect_op;
+	char *name = obddev->obd_type->typ_name;
+	enum ldlm_ns_type ns_type = LDLM_NS_TYPE_UNKNOWN;
+	char *cli_name = lustre_cfg_buf(lcfg, 0);
+	int rc;
+	ENTRY;
 
-        /* In a more perfect world, we would hang a ptlrpc_client off of
-         * obd_type and just use the values from there. */
+	/* In a more perfect world, we would hang a ptlrpc_client off of
+	 * obd_type and just use the values from there. */
 	if (!strcmp(name, LUSTRE_OSC_NAME)) {
 		rq_portal = OST_REQUEST_PORTAL;
 		rp_portal = OSC_REPLY_PORTAL;
@@ -283,7 +284,12 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 		rq_portal = MDS_REQUEST_PORTAL;
 		rp_portal = MDC_REPLY_PORTAL;
 		connect_op = MDS_CONNECT;
-		cli->cl_sp_me = LUSTRE_SP_CLI;
+		if (is_lwp_on_ost(cli_name))
+			cli->cl_sp_me = LUSTRE_SP_OST;
+		else if (is_lwp_on_mdt(cli_name))
+			cli->cl_sp_me = LUSTRE_SP_MDT;
+		else
+			cli->cl_sp_me = LUSTRE_SP_CLI;
 		cli->cl_sp_to = LUSTRE_SP_MDT;
 		ns_type = LDLM_NS_TYPE_MDC;
 	} else if (!strcmp(name, LUSTRE_OSP_NAME)) {
@@ -301,7 +307,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 			rq_portal = OST_REQUEST_PORTAL;
 		}
 		rp_portal = OSC_REPLY_PORTAL;
-		cli->cl_sp_me = LUSTRE_SP_CLI;
+		cli->cl_sp_me = LUSTRE_SP_MDT;
         } else if (!strcmp(name, LUSTRE_MGC_NAME)) {
                 rq_portal = MGS_REQUEST_PORTAL;
                 rp_portal = MGC_REPLY_PORTAL;
@@ -415,6 +421,23 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 		else
 			cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
         }
+
+	spin_lock_init(&cli->cl_mod_rpcs_lock);
+	spin_lock_init(&cli->cl_mod_rpcs_hist.oh_lock);
+	cli->cl_max_mod_rpcs_in_flight = 0;
+	cli->cl_mod_rpcs_in_flight = 0;
+	cli->cl_close_rpcs_in_flight = 0;
+	init_waitqueue_head(&cli->cl_mod_rpcs_waitq);
+	cli->cl_mod_tag_bitmap = NULL;
+
+	if (connect_op == MDS_CONNECT) {
+		cli->cl_max_mod_rpcs_in_flight = cli->cl_max_rpcs_in_flight - 1;
+		OBD_ALLOC(cli->cl_mod_tag_bitmap,
+			  BITS_TO_LONGS(OBD_MAX_RIF_MAX) * sizeof(long));
+		if (cli->cl_mod_tag_bitmap == NULL)
+			GOTO(err, rc = -ENOMEM);
+	}
+
         rc = ldlm_get_ref();
         if (rc) {
                 CERROR("ldlm_get_ref failed: %d\n", rc);
@@ -439,10 +462,9 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
                 GOTO(err_import, rc);
         }
 
-        cli->cl_import = imp;
-        /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */
-        cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3);
-        cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
+	cli->cl_import = imp;
+	/* cli->cl_max_mds_easize updated by mdc_init_ea_size() */
+	cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3);
 
         if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
                 if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) {
@@ -465,8 +487,6 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
                 GOTO(err_import, rc = -ENOMEM);
         }
 
-        cli->cl_qchk_stat = CL_NOT_QUOTACHECKED;
-
         RETURN(rc);
 
 err_import:
@@ -474,6 +494,10 @@ err_import:
 err_ldlm:
         ldlm_put_ref();
 err:
+	if (cli->cl_mod_tag_bitmap != NULL)
+		OBD_FREE(cli->cl_mod_tag_bitmap,
+			 BITS_TO_LONGS(OBD_MAX_RIF_MAX) * sizeof(long));
+	cli->cl_mod_tag_bitmap = NULL;
         RETURN(rc);
 
 }
@@ -481,6 +505,7 @@ EXPORT_SYMBOL(client_obd_setup);
 
 int client_obd_cleanup(struct obd_device *obddev)
 {
+	struct client_obd *cli = &obddev->u.cli;
 	ENTRY;
 
 	ldlm_namespace_free_post(obddev->obd_namespace);
@@ -490,6 +515,12 @@ int client_obd_cleanup(struct obd_device *obddev)
 	LASSERT(obddev->u.cli.cl_import == NULL);
 
 	ldlm_put_ref();
+
+	if (cli->cl_mod_tag_bitmap != NULL)
+		OBD_FREE(cli->cl_mod_tag_bitmap,
+			 BITS_TO_LONGS(OBD_MAX_RIF_MAX) * sizeof(long));
+	cli->cl_mod_tag_bitmap = NULL;
+
 	RETURN(0);
 }
 EXPORT_SYMBOL(client_obd_cleanup);
@@ -505,6 +536,7 @@ int client_connect_import(const struct lu_env *env,
 	struct obd_connect_data *ocd;
 	struct lustre_handle    conn    = { 0 };
 	int                     rc;
+	bool			is_mdc = false;
 	ENTRY;
 
         *exp = NULL;
@@ -529,6 +561,10 @@ int client_connect_import(const struct lu_env *env,
         ocd = &imp->imp_connect_data;
         if (data) {
                 *ocd = *data;
+		is_mdc = strncmp(imp->imp_obd->obd_type->typ_name,
+				 LUSTRE_MDC_NAME, 3) == 0;
+		if (is_mdc)
+			data->ocd_connect_flags |= OBD_CONNECT_MULTIMODRPCS;
                 imp->imp_connect_flags_orig = data->ocd_connect_flags;
         }
 
@@ -544,6 +580,10 @@ int client_connect_import(const struct lu_env *env,
                          ocd->ocd_connect_flags, "old "LPX64", new "LPX64"\n",
                          data->ocd_connect_flags, ocd->ocd_connect_flags);
                 data->ocd_connect_flags = ocd->ocd_connect_flags;
+		/* clear the flag as it was not set and is not known
+		 * by upper layers */
+		if (is_mdc)
+			data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS;
         }
 
         ptlrpc_pinger_add_import(imp);
@@ -648,8 +688,7 @@ int server_disconnect_export(struct obd_export *exp)
 	if (exp->exp_imp_reverse)
 		ptlrpc_cleanup_imp(exp->exp_imp_reverse);
 
-	if (exp->exp_obd->obd_namespace != NULL)
-		ldlm_cancel_locks_for_export(exp);
+	ldlm_bl_thread_wakeup();
 
         /* complete all outstanding replies */
 	spin_lock(&exp->exp_lock);
@@ -682,13 +721,13 @@ static int target_handle_reconnect(struct lustre_handle *conn,
                                    struct obd_export *exp,
                                    struct obd_uuid *cluuid)
 {
-        ENTRY;
+	struct lustre_handle *hdl;
+	ENTRY;
 
-        if (exp->exp_connection && exp->exp_imp_reverse) {
-                struct lustre_handle *hdl;
+	hdl = &exp->exp_imp_reverse->imp_remote_handle;
+	if (exp->exp_connection && lustre_handle_is_used(hdl)) {
                 struct obd_device *target;
 
-                hdl = &exp->exp_imp_reverse->imp_remote_handle;
                 target = exp->exp_obd;
 
                 /* Might be a re-connect after a partition. */
@@ -758,22 +797,131 @@ static void
 check_and_start_recovery_timer(struct obd_device *obd,
                                struct ptlrpc_request *req, int new_client);
 
+/**
+ * update flags for import during reconnect process
+ */
+static int rev_import_flags_update(struct obd_import *revimp,
+				   struct ptlrpc_request *req)
+{
+	int rc;
+	struct obd_connect_data *data;
+
+	data = req_capsule_client_get(&req->rq_pill, &RMF_CONNECT_DATA);
+
+	if (data->ocd_connect_flags & OBD_CONNECT_AT)
+		revimp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
+	else
+		revimp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+
+	revimp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
+
+	rc = sptlrpc_import_sec_adapt(revimp, req->rq_svc_ctx, &req->rq_flvr);
+	if (rc) {
+		CERROR("%s: cannot get reverse import %s security: rc = %d\n",
+			revimp->imp_client->cli_name,
+			libcfs_id2str(req->rq_peer), rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+/**
+ * Allocate a new reverse import for an export.
+ *
+ * \retval -errno in case error hit
+ * \retval 0 if reverse import correctly init
+ **/
+int rev_import_init(struct obd_export *export)
+{
+	struct obd_device *obd = export->exp_obd;
+	struct obd_import *revimp;
+
+	LASSERT(export->exp_imp_reverse == NULL);
+
+	revimp = class_new_import(obd);
+	if (revimp == NULL)
+		return -ENOMEM;
+
+	revimp->imp_remote_handle.cookie = 0ULL;
+	revimp->imp_client = &obd->obd_ldlm_client;
+	revimp->imp_dlm_fake = 1;
+
+	/* it is safe to connect import in new state as no sends possible */
+	spin_lock(&export->exp_lock);
+	export->exp_imp_reverse = revimp;
+	spin_unlock(&export->exp_lock);
+	class_import_put(revimp);
+
+	return 0;
+}
+EXPORT_SYMBOL(rev_import_init);
+
+/**
+ * Handle reconnect for an export.
+ *
+ * \param exp export to handle reconnect process
+ * \param req client reconnect request
+ *
+ * \retval -rc in case securitfy flavor can't be changed
+ * \retval 0 in case none problems
+ */
+static int rev_import_reconnect(struct obd_export *exp,
+				struct ptlrpc_request *req)
+{
+	struct obd_import *revimp = exp->exp_imp_reverse;
+	struct lustre_handle *lh;
+	int rc;
+
+	/* avoid sending a request until import flags are changed */
+	ptlrpc_import_enter_resend(revimp);
+
+	if (revimp->imp_connection != NULL)
+		ptlrpc_connection_put(revimp->imp_connection);
+
+	/*
+	 * client from recovery don't have a handle so we need to take from
+	 * request. it may produce situation when wrong client connected
+	 * to recovery as we trust a client uuid
+	 */
+	lh = req_capsule_client_get(&req->rq_pill, &RMF_CONN);
+	revimp->imp_remote_handle = *lh;
+
+	/* unknown versions will be caught in
+	 * ptlrpc_handle_server_req_in->lustre_unpack_msg() */
+	revimp->imp_msg_magic = req->rq_reqmsg->lm_magic;
+
+	revimp->imp_connection = ptlrpc_connection_addref(exp->exp_connection);
+
+	rc = rev_import_flags_update(revimp, req);
+	if (rc != 0) {
+		/* it is safe to still be in RECOVERY phase as we are not able
+		 * to setup correct security flavor so requests are not able to
+		 * be delivered correctly */
+		return rc;
+	}
+
+	/* resend all rpc's via new connection */
+	return ptlrpc_import_recovery_state_machine(revimp);
+}
+
 int target_handle_connect(struct ptlrpc_request *req)
 {
-	struct obd_device *target = NULL, *targref = NULL;
-        struct obd_export *export = NULL;
-        struct obd_import *revimp;
-	struct obd_import *tmp_imp = NULL;
-        struct lustre_handle conn;
-        struct lustre_handle *tmp;
+	struct obd_device *target = NULL;
+	struct obd_export *export = NULL;
+	/* connect handle - filled from target_handle_reconnect in
+	 * reconnect case */
+	struct lustre_handle conn;
+	struct lustre_handle *tmp;
         struct obd_uuid tgtuuid;
         struct obd_uuid cluuid;
-        struct obd_uuid remote_uuid;
         char *str;
         int rc = 0;
         char *target_start;
         int target_len;
 	bool	 mds_conn = false, lw_client = false;
+	bool	 mds_mds_conn = false;
+	bool	 new_mds_mds_conn = false;
         struct obd_connect_data *data, *tmpdata;
         int size, tmpsize;
         lnet_nid_t *client_nid = NULL;
@@ -803,6 +951,11 @@ int target_handle_connect(struct ptlrpc_request *req)
 	}
 
 	spin_lock(&target->obd_dev_lock);
+	/* Make sure the target isn't cleaned up while we're here. Yes,
+	 * there's still a race between the above check and our incref here.
+	 * Really, class_uuid2obd should take the ref. */
+	class_incref(target, __func__, current);
+
 	if (target->obd_stopping || !target->obd_set_up) {
 		spin_unlock(&target->obd_dev_lock);
 
@@ -824,11 +977,6 @@ int target_handle_connect(struct ptlrpc_request *req)
 		GOTO(out, rc = -EAGAIN);
 	}
 
-	/* Make sure the target isn't cleaned up while we're here. Yes,
-	 * there's still a race between the above check and our incref here.
-	 * Really, class_uuid2obd should take the ref. */
-	targref = class_incref(target, __FUNCTION__, current);
-
 	target->obd_conn_inprogress++;
 	spin_unlock(&target->obd_dev_lock);
 
@@ -840,21 +988,6 @@ int target_handle_connect(struct ptlrpc_request *req)
 
         obd_str2uuid(&cluuid, str);
 
-	/* XXX Extract a nettype and format accordingly. */
-	switch (sizeof(lnet_nid_t)) {
-	/* NB the casts only avoid compiler warnings. */
-        case 8:
-                snprintf(remote_uuid.uuid, sizeof remote_uuid,
-                         "NET_"LPX64"_UUID", (__u64)req->rq_peer.nid);
-                break;
-        case 4:
-                snprintf(remote_uuid.uuid, sizeof remote_uuid,
-                         "NET_%x_UUID", (__u32)req->rq_peer.nid);
-                break;
-        default:
-                LBUG();
-        }
-
         tmp = req_capsule_client_get(&req->rq_pill, &RMF_CONN);
         if (tmp == NULL)
                 GOTO(out, rc = -EPROTO);
@@ -871,6 +1004,20 @@ int target_handle_connect(struct ptlrpc_request *req)
         if (rc)
                 GOTO(out, rc);
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
+	/* Don't allow clients to connect that are using old 1.8 format
+	 * protocol conventions (LUSTRE_MSG_MAGIC_v1, !MSGHDR_CKSUM_INCOMPAT18,
+	 * ldlm_flock_policy_wire format, MDT_ATTR_xTIME_SET, etc).  The
+	 * FULL20 flag should be set on all connections since 2.0, but no
+	 * longer affects behaviour.
+	 *
+	 * Later this check will be disabled and the flag can be retired
+	 * completely once interop with 3.0 is no longer needed.
+	 */
+	if (!(data->ocd_connect_flags & OBD_CONNECT_FULL20))
+		GOTO(out, rc = -EPROTO);
+#endif
+
 	if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
 		if (data->ocd_version < LUSTRE_VERSION_CODE -
 		                               LUSTRE_VERSION_ALLOWED_OFFSET ||
@@ -896,10 +1043,15 @@ int target_handle_connect(struct ptlrpc_request *req)
 		}
 	}
 
+	/* Note: lw_client is needed in MDS-MDS failover during update log
+	 * processing, so we needs to allow lw_client to be connected at
+	 * anytime, instead of only the initial connection */
+	lw_client = (data->ocd_connect_flags & OBD_CONNECT_LIGHTWEIGHT) != 0;
+
 	if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) {
 		mds_conn = (data->ocd_connect_flags & OBD_CONNECT_MDS) != 0;
-		lw_client = (data->ocd_connect_flags &
-			     OBD_CONNECT_LIGHTWEIGHT) != 0;
+		mds_mds_conn = (data->ocd_connect_flags &
+				OBD_CONNECT_MDS_MDS) != 0;
 
 		/* OBD_CONNECT_MNE_SWAB is defined as OBD_CONNECT_MDS_MDS
 		 * for Imperative Recovery connection from MGC to MGS.
@@ -1051,7 +1203,9 @@ no_export:
 
         if (export == NULL) {
 		/* allow lightweight connections during recovery */
-		if (target->obd_recovering && !lw_client) {
+		/* allow "new" MDT to be connected during recovery, since we
+		 * need retrieve recovery update records from it */
+		if (target->obd_recovering && !lw_client && !mds_mds_conn) {
                         cfs_time_t t;
 			int	c; /* connected */
 			int	i; /* in progress */
@@ -1065,30 +1219,35 @@ no_export:
 			t = cfs_timer_deadline(&target->obd_recovery_timer);
 			t = cfs_time_sub(t, cfs_time_current());
 			t = cfs_duration_sec(t);
-			LCONSOLE_WARN("%s: Denying connection for new client "
-				      "%s (at %s), waiting for all %d known "
-				      "clients (%d recovered, %d in progress, "
-				      "and %d evicted) to recover in %d:%.02d\n",
+			LCONSOLE_WARN("%s: Denying connection for new client %s"
+				      "(at %s), waiting for %d known clients "
+				      "(%d recovered, %d in progress, and %d "
+				      "evicted) to recover in %d:%.02d\n",
 				      target->obd_name, cluuid.uuid,
 				      libcfs_nid2str(req->rq_peer.nid), k,
 				      c - i, i, s, (int)t / 60,
 				      (int)t % 60);
-                        rc = -EBUSY;
-                } else {
+			rc = -EBUSY;
+		} else {
 dont_check_exports:
-                        rc = obd_connect(req->rq_svc_thread->t_env,
-                                         &export, target, &cluuid, data,
-                                         client_nid);
+			rc = obd_connect(req->rq_svc_thread->t_env,
+					 &export, target, &cluuid, data,
+					 client_nid);
 			if (mds_conn && OBD_FAIL_CHECK(OBD_FAIL_TGT_RCVG_FLAG))
 				lustre_msg_add_op_flags(req->rq_repmsg,
-						MSG_CONNECT_RECOVERING);
-                        if (rc == 0)
-                                conn.cookie = export->exp_handle.h_cookie;
-                }
-        } else {
-                rc = obd_reconnect(req->rq_svc_thread->t_env,
-                                   export, target, &cluuid, data, client_nid);
-        }
+							MSG_CONNECT_RECOVERING);
+			if (rc == 0) {
+				conn.cookie = export->exp_handle.h_cookie;
+				rc = rev_import_init(export);
+			}
+
+			if (mds_mds_conn)
+				new_mds_mds_conn = true;
+		}
+	} else {
+		rc = obd_reconnect(req->rq_svc_thread->t_env,
+				   export, target, &cluuid, data, client_nid);
+	}
 	if (rc)
 		GOTO(out, rc);
 
@@ -1108,11 +1267,6 @@ dont_check_exports:
                 memcpy(tmpdata, data, min(tmpsize, size));
         }
 
-        /* If all else goes well, this is our RPC return code. */
-        req->rq_status = 0;
-
-        lustre_msg_set_handle(req->rq_repmsg, &conn);
-
         /* If the client and the server are the same node, we will already
          * have an export that really points to the client's DLM export,
          * because we have a shared handles table.
@@ -1159,14 +1313,19 @@ dont_check_exports:
                 ptlrpc_connection_put(export->exp_connection);
         }
 
-        export->exp_connection = ptlrpc_connection_get(req->rq_peer,
-                                                       req->rq_self,
-                                                       &remote_uuid);
-	if (hlist_unhashed(&export->exp_nid_hash)) {
-                cfs_hash_add(export->exp_obd->obd_nid_hash,
-                             &export->exp_connection->c_peer.nid,
-                             &export->exp_nid_hash);
-        }
+	export->exp_connection = ptlrpc_connection_get(req->rq_peer,
+						       req->rq_self,
+						       &cluuid);
+	if (hlist_unhashed(&export->exp_nid_hash))
+		cfs_hash_add(export->exp_obd->obd_nid_hash,
+			     &export->exp_connection->c_peer.nid,
+			     &export->exp_nid_hash);
+
+	lustre_msg_set_handle(req->rq_repmsg, &conn);
+
+	rc = rev_import_reconnect(export, req);
+	if (rc != 0)
+		GOTO(out, rc);
 
 	if (target->obd_recovering && !export->exp_in_recovery && !lw_client) {
                 int has_transno;
@@ -1201,6 +1360,14 @@ dont_check_exports:
 
 		atomic_inc(&target->obd_req_replay_clients);
 		atomic_inc(&target->obd_lock_replay_clients);
+		/* Note: MDS-MDS connection is allowed to be connected during
+		 * recovery, no matter if the exports needs to be recoveried.
+		 * Because we need retrieve updates logs from all other MDTs.
+		 * So if the MDS-MDS export is new, obd_max_recoverable_clients
+		 * also needs to be increased to match other recovery checking
+		 * condition. */
+		if (new_mds_mds_conn)
+			target->obd_max_recoverable_clients++;
 		if (atomic_inc_return(&target->obd_connected_clients) ==
 		    target->obd_max_recoverable_clients)
 			wake_up(&target->obd_next_transno_waitq);
@@ -1210,59 +1377,7 @@ dont_check_exports:
 	if (target->obd_recovering && !lw_client)
                 lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
 
-        tmp = req_capsule_client_get(&req->rq_pill, &RMF_CONN);
-        conn = *tmp;
-
-	/* Return -ENOTCONN in case of errors to let client reconnect. */
-	revimp = class_new_import(target);
-	if (revimp == NULL) {
-		CERROR("fail to alloc new reverse import.\n");
-		GOTO(out, rc = -ENOTCONN);
-	}
-
-	spin_lock(&export->exp_lock);
-	if (export->exp_imp_reverse != NULL)
-		/* destroyed import can be still referenced in ctxt */
-		tmp_imp = export->exp_imp_reverse;
-	export->exp_imp_reverse = revimp;
-	spin_unlock(&export->exp_lock);
-
-        revimp->imp_connection = ptlrpc_connection_addref(export->exp_connection);
-        revimp->imp_client = &export->exp_obd->obd_ldlm_client;
-        revimp->imp_remote_handle = conn;
-        revimp->imp_dlm_fake = 1;
-        revimp->imp_state = LUSTRE_IMP_FULL;
-
-	/* Unknown versions will be caught in
-	 * ptlrpc_handle_server_req_in->lustre_unpack_msg(). */
-        revimp->imp_msg_magic = req->rq_reqmsg->lm_magic;
-
-	if ((data->ocd_connect_flags & OBD_CONNECT_AT) &&
-	    (revimp->imp_msg_magic != LUSTRE_MSG_MAGIC_V1))
-		revimp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
-	else
-		revimp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
-
-	if ((data->ocd_connect_flags & OBD_CONNECT_FULL20) &&
-            (revimp->imp_msg_magic != LUSTRE_MSG_MAGIC_V1))
-                revimp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
-        else
-                revimp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
-
-	rc = sptlrpc_import_sec_adapt(revimp, req->rq_svc_ctx, &req->rq_flvr);
-	if (rc) {
-		CERROR("Failed to get sec for reverse import: %d\n", rc);
-		spin_lock(&export->exp_lock);
-		export->exp_imp_reverse = NULL;
-		spin_unlock(&export->exp_lock);
-		class_destroy_import(revimp);
-	}
-
-	class_import_put(revimp);
-
 out:
-	if (tmp_imp != NULL)
-		client_destroy_import(tmp_imp);
 	if (export) {
 		spin_lock(&export->exp_lock);
 		export->exp_connecting = 0;
@@ -1270,15 +1385,14 @@ out:
 
 		class_export_put(export);
 	}
-	if (targref) {
+	if (target != NULL) {
 		spin_lock(&target->obd_dev_lock);
 		target->obd_conn_inprogress--;
 		spin_unlock(&target->obd_dev_lock);
 
-		class_decref(targref, __func__, current);
+		class_decref(target, __func__, current);
 	}
-	if (rc)
-		req->rq_status = rc;
+	req->rq_status = rc;
 	RETURN(rc);
 }
 
@@ -1348,6 +1462,7 @@ static int target_exp_enqueue_req_replay(struct ptlrpc_request *req)
         __u64                  transno = lustre_msg_get_transno(req->rq_reqmsg);
         struct obd_export     *exp = req->rq_export;
         struct ptlrpc_request *reqiter;
+	struct ptlrpc_request *dup_req = NULL;
         int                    dup = 0;
 
         LASSERT(exp);
@@ -1356,6 +1471,7 @@ static int target_exp_enqueue_req_replay(struct ptlrpc_request *req)
 	list_for_each_entry(reqiter, &exp->exp_req_replay_queue,
                                 rq_replay_list) {
                 if (lustre_msg_get_transno(reqiter->rq_reqmsg) == transno) {
+			dup_req = reqiter;
                         dup = 1;
                         break;
                 }
@@ -1367,6 +1483,16 @@ static int target_exp_enqueue_req_replay(struct ptlrpc_request *req)
                      (MSG_RESENT | MSG_REPLAY)) != (MSG_RESENT | MSG_REPLAY))
                         CERROR("invalid flags %x of resent replay\n",
                                lustre_msg_get_flags(req->rq_reqmsg));
+
+		if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) {
+			__u32 new_conn;
+
+			new_conn = lustre_msg_get_conn_cnt(req->rq_reqmsg);
+			if (new_conn >
+			    lustre_msg_get_conn_cnt(dup_req->rq_reqmsg))
+				lustre_msg_set_conn_cnt(dup_req->rq_reqmsg,
+							new_conn);
+		}
         } else {
 		list_add_tail(&req->rq_replay_list,
                                   &exp->exp_req_replay_queue);
@@ -1386,8 +1512,9 @@ static void target_exp_dequeue_req_replay(struct ptlrpc_request *req)
 	spin_unlock(&req->rq_export->exp_lock);
 }
 
-static void target_finish_recovery(struct obd_device *obd)
+static void target_finish_recovery(struct lu_target *lut)
 {
+	struct obd_device *obd = lut->lut_obd;
         ENTRY;
 
 	/* Only log a recovery message when recovery has occurred. */
@@ -1420,6 +1547,13 @@ static void target_finish_recovery(struct obd_device *obd)
 	}
 	spin_unlock(&obd->obd_recovery_task_lock);
 
+	if (lut->lut_tdtd != NULL &&
+	    (!list_empty(&lut->lut_tdtd->tdtd_replay_list) ||
+	    !list_empty(&lut->lut_tdtd->tdtd_replay_finish_list))) {
+		dtrq_list_dump(lut->lut_tdtd, D_ERROR);
+		dtrq_list_destroy(lut->lut_tdtd);
+	}
+
         obd->obd_recovery_end = cfs_time_current_sec();
 
 	/* When recovery finished, cleanup orphans on MDS and OST. */
@@ -1486,7 +1620,6 @@ void target_cleanup_recovery(struct obd_device *obd)
 {
         struct ptlrpc_request *req, *n;
 	struct list_head clean_list;
-        ENTRY;
 
 	INIT_LIST_HEAD(&clean_list);
 	spin_lock(&obd->obd_dev_lock);
@@ -1496,6 +1629,7 @@ void target_cleanup_recovery(struct obd_device *obd)
 		return;
 	}
 	obd->obd_recovering = obd->obd_abort_recovery = 0;
+	obd->obd_force_abort_recovery = 0;
 	spin_unlock(&obd->obd_dev_lock);
 
 	spin_lock(&obd->obd_recovery_task_lock);
@@ -1536,7 +1670,8 @@ static void target_start_recovery_timer(struct obd_device *obd)
 		return;
 
 	spin_lock(&obd->obd_dev_lock);
-	if (!obd->obd_recovering || obd->obd_abort_recovery) {
+	if (!obd->obd_recovering || obd->obd_abort_recovery ||
+	    obd->obd_force_abort_recovery) {
 		spin_unlock(&obd->obd_dev_lock);
 		return;
 	}
@@ -1577,7 +1712,8 @@ static void extend_recovery_timer(struct obd_device *obd, int drt, bool extend)
 	int to;
 
 	spin_lock(&obd->obd_dev_lock);
-	if (!obd->obd_recovering || obd->obd_abort_recovery) {
+	if (!obd->obd_recovering || obd->obd_abort_recovery ||
+	    obd->obd_force_abort_recovery) {
 		spin_unlock(&obd->obd_dev_lock);
                 return;
         }
@@ -1679,23 +1815,15 @@ static inline int exp_finished(struct obd_export *exp)
         return (exp->exp_in_recovery && !exp->exp_lock_replay_needed);
 }
 
-/** Checking routines for recovery */
-static int check_for_clients(struct obd_device *obd)
-{
-	unsigned int clnts = atomic_read(&obd->obd_connected_clients);
-
-	if (obd->obd_abort_recovery || obd->obd_recovery_expired)
-		return 1;
-	LASSERT(clnts <= obd->obd_max_recoverable_clients);
-	return (clnts + obd->obd_stale_clients ==
-		obd->obd_max_recoverable_clients);
-}
-
-static int check_for_next_transno(struct obd_device *obd)
+static int check_for_next_transno(struct lu_target *lut)
 {
 	struct ptlrpc_request *req = NULL;
+	struct obd_device *obd = lut->lut_obd;
+	struct target_distribute_txn_data *tdtd = lut->lut_tdtd;
 	int wake_up = 0, connected, completed, queue_len;
-	__u64 next_transno, req_transno;
+	__u64 req_transno = 0;
+	__u64 update_transno = 0;
+	__u64 next_transno = 0;
 	ENTRY;
 
 	spin_lock(&obd->obd_recovery_task_lock);
@@ -1703,10 +1831,11 @@ static int check_for_next_transno(struct obd_device *obd)
 		req = list_entry(obd->obd_req_replay_queue.next,
 				     struct ptlrpc_request, rq_list);
 		req_transno = lustre_msg_get_transno(req->rq_reqmsg);
-	} else {
-		req_transno = 0;
 	}
 
+	if (tdtd != NULL)
+		update_transno = distribute_txn_get_next_transno(tdtd);
+
 	connected = atomic_read(&obd->obd_connected_clients);
 	completed = connected - atomic_read(&obd->obd_req_replay_clients);
 	queue_len = obd->obd_requests_queued_for_recovery;
@@ -1717,13 +1846,21 @@ static int check_for_next_transno(struct obd_device *obd)
 	       obd->obd_max_recoverable_clients, connected, completed,
 	       queue_len, req_transno, next_transno);
 
-	if (obd->obd_abort_recovery) {
+	if (obd->obd_abort_recovery || obd->obd_force_abort_recovery) {
 		CDEBUG(D_HA, "waking for aborted recovery\n");
 		wake_up = 1;
 	} else if (obd->obd_recovery_expired) {
 		CDEBUG(D_HA, "waking for expired recovery\n");
 		wake_up = 1;
-	} else if (req_transno == next_transno) {
+	} else if (tdtd != NULL && req != NULL &&
+		   is_req_replayed_by_update(req)) {
+		LASSERTF(req_transno < next_transno, "req_transno "LPU64
+			 "next_transno"LPU64"\n", req_transno, next_transno);
+		CDEBUG(D_HA, "waking for duplicate req ("LPU64")\n",
+		       req_transno);
+		wake_up = 1;
+	} else if (req_transno == next_transno ||
+		   (update_transno != 0 && update_transno <= next_transno)) {
 		CDEBUG(D_HA, "waking for next ("LPD64")\n", next_transno);
 		wake_up = 1;
 	} else if (queue_len > 0 &&
@@ -1739,10 +1876,10 @@ static int check_for_next_transno(struct obd_device *obd)
 		CDEBUG(d_lvl,
 		       "%s: waking for gap in transno, VBR is %s (skip: "
 		       LPD64", ql: %d, comp: %d, conn: %d, next: "LPD64
-		       ", last_committed: "LPD64")\n",
+		       ", next_update "LPD64" last_committed: "LPD64")\n",
 		       obd->obd_name, obd->obd_version_recov ? "ON" : "OFF",
 		       next_transno, queue_len, completed, connected,
-		       req_transno, obd->obd_last_committed);
+		       req_transno, update_transno, obd->obd_last_committed);
 		obd->obd_next_recovery_transno = req_transno;
 		wake_up = 1;
 	} else if (atomic_read(&obd->obd_req_replay_clients) == 0) {
@@ -1758,8 +1895,9 @@ static int check_for_next_transno(struct obd_device *obd)
 	return wake_up;
 }
 
-static int check_for_next_lock(struct obd_device *obd)
+static int check_for_next_lock(struct lu_target *lut)
 {
+	struct obd_device *obd = lut->lut_obd;
 	int wake_up = 0;
 
 	spin_lock(&obd->obd_recovery_task_lock);
@@ -1769,7 +1907,7 @@ static int check_for_next_lock(struct obd_device *obd)
 	} else if (atomic_read(&obd->obd_lock_replay_clients) == 0) {
 		CDEBUG(D_HA, "waking for completed lock replay\n");
 		wake_up = 1;
-	} else if (obd->obd_abort_recovery) {
+	} else if (obd->obd_abort_recovery || obd->obd_force_abort_recovery) {
 		CDEBUG(D_HA, "waking for aborted recovery\n");
 		wake_up = 1;
 	} else if (obd->obd_recovery_expired) {
@@ -1786,10 +1924,11 @@ static int check_for_next_lock(struct obd_device *obd)
  * check its status with help of check_routine
  * evict dead clients via health_check
  */
-static int target_recovery_overseer(struct obd_device *obd,
-				    int (*check_routine)(struct obd_device *),
+static int target_recovery_overseer(struct lu_target *lut,
+				    int (*check_routine)(struct lu_target *),
 				    int (*health_check)(struct obd_export *))
 {
+	struct obd_device	*obd = lut->lut_obd;
 repeat:
 	if ((obd->obd_recovery_start != 0) && (cfs_time_current_sec() >=
 	      (obd->obd_recovery_start + obd->obd_recovery_time_hard))) {
@@ -1798,11 +1937,11 @@ repeat:
 	}
 
 	while (wait_event_timeout(obd->obd_next_transno_waitq,
-				  check_routine(obd),
+				  check_routine(lut),
 				  msecs_to_jiffies(60 * MSEC_PER_SEC)) == 0)
 		/* wait indefinitely for event, but don't trigger watchdog */;
 
-	if (obd->obd_abort_recovery) {
+	if (obd->obd_abort_recovery || obd->obd_force_abort_recovery) {
 		CWARN("recovery is aborted, evict exports in recovery\n");
 		/** evict exports which didn't finish recovery yet */
 		class_disconnect_stale_exports(obd, exp_finished);
@@ -1829,50 +1968,13 @@ repeat:
 	return 0;
 }
 
-static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd)
-{
-	struct ptlrpc_request *req = NULL;
-	ENTRY;
-
-	CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
-		obd->obd_next_recovery_transno);
-
-	CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
-	/** It is needed to extend recovery window above recovery_time_soft.
-	 *  Extending is possible only in the end of recovery window
-	 *  (see more details in handle_recovery_req).
-	 */
-	CFS_FAIL_TIMEOUT_MS(OBD_FAIL_TGT_REPLAY_DELAY, 300);
-
-	if (target_recovery_overseer(obd, check_for_next_transno,
-				     exp_req_replay_healthy)) {
-		abort_req_replay_queue(obd);
-		abort_lock_replay_queue(obd);
-	}
-
-	spin_lock(&obd->obd_recovery_task_lock);
-	if (!list_empty(&obd->obd_req_replay_queue)) {
-		req = list_entry(obd->obd_req_replay_queue.next,
-				     struct ptlrpc_request, rq_list);
-		list_del_init(&req->rq_list);
-		obd->obd_requests_queued_for_recovery--;
-		spin_unlock(&obd->obd_recovery_task_lock);
-	} else {
-		spin_unlock(&obd->obd_recovery_task_lock);
-		LASSERT(list_empty(&obd->obd_req_replay_queue));
-		LASSERT(atomic_read(&obd->obd_req_replay_clients) == 0);
-		/** evict exports failed VBR */
-		class_disconnect_stale_exports(obd, exp_vbr_healthy);
-	}
-	RETURN(req);
-}
-
-static struct ptlrpc_request *target_next_replay_lock(struct obd_device *obd)
+static struct ptlrpc_request *target_next_replay_lock(struct lu_target *lut)
 {
+	struct obd_device	*obd = lut->lut_obd;
 	struct ptlrpc_request *req = NULL;
 
 	CDEBUG(D_HA, "Waiting for lock\n");
-	if (target_recovery_overseer(obd, check_for_next_lock,
+	if (target_recovery_overseer(lut, check_for_next_lock,
 				     exp_lock_replay_healthy))
 		abort_lock_replay_queue(obd);
 
@@ -1966,6 +2068,249 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
 	EXIT;
 }
 
+/** Checking routines for recovery */
+static int check_for_recovery_ready(struct lu_target *lut)
+{
+	struct obd_device *obd = lut->lut_obd;
+	unsigned int clnts = atomic_read(&obd->obd_connected_clients);
+
+	CDEBUG(D_HA, "connected %d stale %d max_recoverable_clients %d"
+	       " abort %d expired %d\n", clnts, obd->obd_stale_clients,
+	       obd->obd_max_recoverable_clients, obd->obd_abort_recovery,
+	       obd->obd_recovery_expired);
+
+	if (obd->obd_force_abort_recovery)
+		return 1;
+
+	if (!obd->obd_abort_recovery && !obd->obd_recovery_expired) {
+		LASSERT(clnts <= obd->obd_max_recoverable_clients);
+		if (clnts + obd->obd_stale_clients <
+		    obd->obd_max_recoverable_clients)
+			return 0;
+	}
+
+	if (lut->lut_tdtd != NULL) {
+		if (!lut->lut_tdtd->tdtd_replay_ready) {
+			/* Let's extend recovery timer, in case the recovery
+			 * timer expired, and some clients got evicted */
+			extend_recovery_timer(obd, obd->obd_recovery_timeout,
+					      true);
+			return 0;
+		} else {
+			dtrq_list_dump(lut->lut_tdtd, D_HA);
+		}
+	}
+
+	return 1;
+}
+
+enum {
+	REQUEST_RECOVERY = 1,
+	UPDATE_RECOVERY = 2,
+};
+
+static __u64 get_next_replay_req_transno(struct obd_device *obd)
+{
+	__u64 transno = 0;
+
+	if (!list_empty(&obd->obd_req_replay_queue)) {
+		struct ptlrpc_request *req;
+
+		req = list_entry(obd->obd_req_replay_queue.next,
+				 struct ptlrpc_request, rq_list);
+		transno = lustre_msg_get_transno(req->rq_reqmsg);
+	}
+
+	return transno;
+}
+__u64 get_next_transno(struct lu_target *lut, int *type)
+{
+	struct obd_device *obd = lut->lut_obd;
+	struct target_distribute_txn_data *tdtd = lut->lut_tdtd;
+	__u64 transno = 0;
+	__u64 update_transno;
+	ENTRY;
+
+	transno = get_next_replay_req_transno(obd);
+	if (type != NULL)
+		*type = REQUEST_RECOVERY;
+
+	if (tdtd == NULL)
+		RETURN(transno);
+
+	update_transno = distribute_txn_get_next_transno(tdtd);
+	if (transno == 0 || (transno >= update_transno &&
+			     update_transno != 0)) {
+		transno = update_transno;
+		if (type != NULL)
+			*type = UPDATE_RECOVERY;
+	}
+
+	RETURN(transno);
+}
+
+/**
+ * drop duplicate replay request
+ *
+ * Because the operation has been replayed by update recovery, the request
+ * with the same transno will be dropped and also notify the client to send
+ * next replay request.
+ *
+ * \param[in] env	execution environment
+ * \param[in] obd	failover obd device
+ * \param[in] req	request to be dropped
+ */
+static void drop_duplicate_replay_req(struct lu_env *env,
+				      struct obd_device *obd,
+				      struct ptlrpc_request *req)
+{
+	DEBUG_REQ(D_HA, req, "remove t"LPD64" from %s because of duplicate"
+		  " update records are found.\n",
+		  lustre_msg_get_transno(req->rq_reqmsg),
+		  libcfs_nid2str(req->rq_peer.nid));
+
+	/* Right now, only for MDS reint operation update replay and
+	 * normal request replay can have the same transno */
+	if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_REINT) {
+		req_capsule_set(&req->rq_pill, &RQF_MDS_REINT);
+		req->rq_status = req_capsule_server_pack(&req->rq_pill);
+		if (likely(req->rq_export))
+			target_committed_to_req(req);
+		lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
+		target_send_reply(req, req->rq_status, 0);
+	} else {
+		DEBUG_REQ(D_ERROR, req, "wrong opc" "from %s\n",
+		libcfs_nid2str(req->rq_peer.nid));
+	}
+	target_exp_dequeue_req_replay(req);
+	target_request_copy_put(req);
+	obd->obd_replayed_requests++;
+}
+
+static void replay_request_or_update(struct lu_env *env,
+				     struct lu_target *lut,
+				     struct target_recovery_data *trd,
+				     struct ptlrpc_thread *thread)
+{
+	struct obd_device *obd = lut->lut_obd;
+	struct ptlrpc_request *req = NULL;
+	int			type;
+	__u64			transno;
+	ENTRY;
+
+	CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
+	       obd->obd_next_recovery_transno);
+
+	/* Replay all of request and update by transno */
+	do {
+		struct target_distribute_txn_data *tdtd = lut->lut_tdtd;
+
+		CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
+
+		/** It is needed to extend recovery window above
+		 *  recovery_time_soft. Extending is possible only in the
+		 *  end of recovery window (see more details in
+		 *  handle_recovery_req()).
+		 */
+		CFS_FAIL_TIMEOUT_MS(OBD_FAIL_TGT_REPLAY_DELAY, 300);
+
+		if (target_recovery_overseer(lut, check_for_next_transno,
+					     exp_req_replay_healthy)) {
+			abort_req_replay_queue(obd);
+			abort_lock_replay_queue(obd);
+		}
+
+		spin_lock(&obd->obd_recovery_task_lock);
+		transno = get_next_transno(lut, &type);
+		if (type == REQUEST_RECOVERY && transno != 0) {
+			/* Drop replay request from client side, if the
+			 * replay has been executed by update with the
+			 * same transno */
+			req = list_entry(obd->obd_req_replay_queue.next,
+					struct ptlrpc_request, rq_list);
+
+			list_del_init(&req->rq_list);
+			obd->obd_requests_queued_for_recovery--;
+			spin_unlock(&obd->obd_recovery_task_lock);
+
+			/* Let's check if the request has been redone by
+			 * update replay */
+			if (is_req_replayed_by_update(req)) {
+				struct distribute_txn_replay_req *dtrq;
+
+				dtrq = distribute_txn_lookup_finish_list(tdtd,
+								   req->rq_xid);
+				LASSERT(dtrq != NULL);
+				spin_lock(&tdtd->tdtd_replay_list_lock);
+				list_del_init(&dtrq->dtrq_list);
+				spin_unlock(&tdtd->tdtd_replay_list_lock);
+				dtrq_destroy(dtrq);
+
+				drop_duplicate_replay_req(env, obd, req);
+
+				continue;
+			}
+
+			LASSERT(trd->trd_processing_task == current_pid());
+			DEBUG_REQ(D_HA, req, "processing t"LPD64" from %s",
+				  lustre_msg_get_transno(req->rq_reqmsg),
+				  libcfs_nid2str(req->rq_peer.nid));
+
+			handle_recovery_req(thread, req,
+					    trd->trd_recovery_handler);
+			/**
+			 * bz18031: increase next_recovery_transno before
+			 * target_request_copy_put() will drop exp_rpc reference
+			 */
+			spin_lock(&obd->obd_recovery_task_lock);
+			obd->obd_next_recovery_transno++;
+			spin_unlock(&obd->obd_recovery_task_lock);
+			target_exp_dequeue_req_replay(req);
+			target_request_copy_put(req);
+			obd->obd_replayed_requests++;
+		} else if (type == UPDATE_RECOVERY && transno != 0) {
+			struct distribute_txn_replay_req *dtrq;
+
+			spin_unlock(&obd->obd_recovery_task_lock);
+
+			LASSERT(tdtd != NULL);
+			dtrq = distribute_txn_get_next_req(tdtd);
+			lu_context_enter(&thread->t_env->le_ctx);
+			tdtd->tdtd_replay_handler(env, tdtd, dtrq);
+			lu_context_exit(&thread->t_env->le_ctx);
+			extend_recovery_timer(obd, obd_timeout, true);
+
+			/* Add it to the replay finish list */
+			spin_lock(&tdtd->tdtd_replay_list_lock);
+			if (dtrq->dtrq_xid != 0) {
+				CDEBUG(D_HA, "Move x"LPU64" t"LPU64
+				       " to finish list\n", dtrq->dtrq_xid,
+				       dtrq->dtrq_master_transno);
+				list_add(&dtrq->dtrq_list,
+					 &tdtd->tdtd_replay_finish_list);
+			} else {
+				dtrq_destroy(dtrq);
+			}
+			spin_unlock(&tdtd->tdtd_replay_list_lock);
+
+			spin_lock(&obd->obd_recovery_task_lock);
+			if (transno == obd->obd_next_recovery_transno)
+				obd->obd_next_recovery_transno++;
+			else if (transno > obd->obd_next_recovery_transno)
+				obd->obd_next_recovery_transno = transno + 1;
+			spin_unlock(&obd->obd_recovery_task_lock);
+
+		} else {
+			spin_unlock(&obd->obd_recovery_task_lock);
+			LASSERT(list_empty(&obd->obd_req_replay_queue));
+			LASSERT(atomic_read(&obd->obd_req_replay_clients) == 0);
+			/** evict exports failed VBR */
+			class_disconnect_stale_exports(obd, exp_vbr_healthy);
+			break;
+		}
+	} while (1);
+}
+
 static int target_recovery_thread(void *arg)
 {
         struct lu_target *lut = arg;
@@ -2011,43 +2356,28 @@ static int target_recovery_thread(void *arg)
 	spin_unlock(&obd->obd_dev_lock);
 	complete(&trd->trd_starting);
 
-        /* first of all, we have to know the first transno to replay */
-        if (target_recovery_overseer(obd, check_for_clients,
-                                     exp_connect_healthy)) {
-                abort_req_replay_queue(obd);
-                abort_lock_replay_queue(obd);
-        }
+	/* first of all, we have to know the first transno to replay */
+	if (target_recovery_overseer(lut, check_for_recovery_ready,
+				     exp_connect_healthy)) {
+		abort_req_replay_queue(obd);
+		abort_lock_replay_queue(obd);
+		if (lut->lut_tdtd != NULL)
+			dtrq_list_destroy(lut->lut_tdtd);
+	}
 
-	/* next stage: replay requests */
+	/* next stage: replay requests or update */
 	delta = jiffies;
 	CDEBUG(D_INFO, "1: request replay stage - %d clients from t"LPU64"\n",
 	       atomic_read(&obd->obd_req_replay_clients),
 	       obd->obd_next_recovery_transno);
-	while ((req = target_next_replay_req(obd))) {
-		LASSERT(trd->trd_processing_task == current_pid());
-		DEBUG_REQ(D_HA, req, "processing t"LPD64" from %s",
-			  lustre_msg_get_transno(req->rq_reqmsg),
-			  libcfs_nid2str(req->rq_peer.nid));
-                handle_recovery_req(thread, req,
-                                    trd->trd_recovery_handler);
-                /**
-                 * bz18031: increase next_recovery_transno before
-                 * target_request_copy_put() will drop exp_rpc reference
-                 */
-		spin_lock(&obd->obd_recovery_task_lock);
-		obd->obd_next_recovery_transno++;
-		spin_unlock(&obd->obd_recovery_task_lock);
-                target_exp_dequeue_req_replay(req);
-                target_request_copy_put(req);
-                obd->obd_replayed_requests++;
-        }
+	replay_request_or_update(env, lut, trd, thread);
 
 	/**
 	 * The second stage: replay locks
 	 */
 	CDEBUG(D_INFO, "2: lock replay stage - %d clients\n",
 	       atomic_read(&obd->obd_lock_replay_clients));
-	while ((req = target_next_replay_lock(obd))) {
+	while ((req = target_next_replay_lock(lut))) {
 		LASSERT(trd->trd_processing_task == current_pid());
 		DEBUG_REQ(D_HA, req, "processing lock from %s: ",
 			  libcfs_nid2str(req->rq_peer.nid));
@@ -2094,7 +2424,7 @@ static int target_recovery_thread(void *arg)
 		libcfs_debug_dumplog();
 	}
 
-        target_finish_recovery(obd);
+	target_finish_recovery(lut);
 
         lu_context_fini(&env->le_ctx);
         trd->trd_processing_task = 0;
@@ -2170,6 +2500,7 @@ static void target_recovery_expired(unsigned long castmeharder)
 void target_recovery_init(struct lu_target *lut, svc_handler_t handler)
 {
         struct obd_device *obd = lut->lut_obd;
+
         if (obd->obd_max_recoverable_clients == 0) {
                 /** Update server last boot epoch */
                 tgt_boot_epoch_update(lut);
@@ -2189,7 +2520,6 @@ void target_recovery_init(struct lu_target *lut, svc_handler_t handler)
 }
 EXPORT_SYMBOL(target_recovery_init);
 
-
 static int target_process_req_flags(struct obd_device *obd,
                                     struct ptlrpc_request *req)
 {
@@ -2300,8 +2630,13 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
         CDEBUG(D_HA, "Next recovery transno: "LPU64
                ", current: "LPU64", replaying\n",
                obd->obd_next_recovery_transno, transno);
+
+	/* If the request has been replayed by update replay, then sends this
+	 * request to the recovery thread (replay_request_or_update()), where
+	 * it will be handled */
 	spin_lock(&obd->obd_recovery_task_lock);
-	if (transno < obd->obd_next_recovery_transno) {
+	if (transno < obd->obd_next_recovery_transno &&
+	    !is_req_replayed_by_update(req)) {
 		/* Processing the queue right now, don't re-add. */
 		LASSERT(list_empty(&req->rq_list));
 		spin_unlock(&obd->obd_recovery_task_lock);
@@ -2410,20 +2745,23 @@ int target_pack_pool_reply(struct ptlrpc_request *req)
 static int target_send_reply_msg(struct ptlrpc_request *req,
 				 int rc, int fail_id)
 {
-        if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) {
-                DEBUG_REQ(D_ERROR, req, "dropping reply");
-                return (-ECOMM);
-        }
+	if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) {
+		DEBUG_REQ(D_ERROR, req, "dropping reply");
+		return -ECOMM;
+	}
+	if (unlikely(lustre_msg_get_opc(req->rq_reqmsg) == MDS_REINT &&
+		     OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_MULTI_NET_REP)))
+		return -ECOMM;
 
-        if (unlikely(rc)) {
-                DEBUG_REQ(D_NET, req, "processing error (%d)", rc);
-                req->rq_status = rc;
-                return (ptlrpc_send_error(req, 1));
-        } else {
-                DEBUG_REQ(D_NET, req, "sending reply");
-        }
+	if (unlikely(rc)) {
+		DEBUG_REQ(D_NET, req, "processing error (%d)", rc);
+		req->rq_status = rc;
+		return ptlrpc_send_error(req, 1);
+	} else {
+		DEBUG_REQ(D_NET, req, "sending reply");
+	}
 
-        return (ptlrpc_send_reply(req, PTLRPC_REPLY_MAYBE_DIFFICULT));
+	return ptlrpc_send_reply(req, PTLRPC_REPLY_MAYBE_DIFFICULT);
 }
 
 void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
@@ -2518,7 +2856,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
 	EXIT;
 }
 
-ldlm_mode_t lck_compat_array[] = {
+enum ldlm_mode lck_compat_array[] = {
 	[LCK_EX]    = LCK_COMPAT_EX,
 	[LCK_PW]    = LCK_COMPAT_PW,
 	[LCK_PR]    = LCK_COMPAT_PR,
@@ -2533,12 +2871,12 @@ ldlm_mode_t lck_compat_array[] = {
  * Rather arbitrary mapping from LDLM error codes to errno values. This should
  * not escape to the user level.
  */
-int ldlm_error2errno(ldlm_error_t error)
+int ldlm_error2errno(enum ldlm_error error)
 {
-        int result;
+	int result;
 
-        switch (error) {
-        case ELDLM_OK:
+	switch (error) {
+	case ELDLM_OK:
 	case ELDLM_LOCK_MATCHED:
                 result = 0;
                 break;
@@ -2560,22 +2898,22 @@ int ldlm_error2errno(ldlm_error_t error)
         case ELDLM_BAD_NAMESPACE:
                 result = -EBADF;
                 break;
-        default:
-                if (((int)error) < 0)  /* cast to signed type */
-                        result = error; /* as ldlm_error_t can be unsigned */
-                else {
-                        CERROR("Invalid DLM result code: %d\n", error);
-                        result = -EPROTO;
-                }
-        }
-        return result;
+	default:
+		if (((int)error) < 0) { /* cast to signed type */
+			result = error; /* as ldlm_error can be unsigned */
+		} else {
+			CERROR("Invalid DLM result code: %d\n", error);
+			result = -EPROTO;
+		}
+	}
+	return result;
 }
 EXPORT_SYMBOL(ldlm_error2errno);
 
 /**
- * Dual to ldlm_error2errno(): maps errno values back to ldlm_error_t.
+ * Dual to ldlm_error2errno(): maps errno values back to enum ldlm_error.
  */
-ldlm_error_t ldlm_errno2error(int err_no)
+enum ldlm_error ldlm_errno2error(int err_no)
 {
         int error;
 
@@ -2669,6 +3007,13 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc,
 	} else {
 		if (req->rq_bulk_read)
 			rc = sptlrpc_svc_wrap_bulk(req, desc);
+
+		if ((exp->exp_connect_data.ocd_connect_flags &
+		     OBD_CONNECT_BULK_MBITS) != 0)
+			req->rq_mbits = lustre_msg_get_mbits(req->rq_reqmsg);
+		else /* old version, bulk matchbits is rq_xid */
+			req->rq_mbits = req->rq_xid;
+
 		if (rc == 0)
 			rc = ptlrpc_start_bulk_transfer(desc);
 	}