- Drop requests with wrong flags during normal processing.
- remove race in target_handle_connect which can cause replays over
new export
- test case for wrong replays handling
Change-Id: Icbfc0ae549ab63a510e0d62fcac1ed661b324fb9
Signed-off-by: Mikhail Pershin <tappro@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/391
Tested-by: Hudson
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Niu Yawei <niu@whamcloud.com>
#define OBD_FAIL_TGT_REPLAY_DELAY 0x709
#define OBD_FAIL_TGT_LAST_REPLAY 0x710
#define OBD_FAIL_TGT_CLIENT_ADD 0x711
+#define OBD_FAIL_TGT_RCVG_FLAG 0x712
#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
#define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801
export, (long)cfs_time_current_sec(),
export ? (long)export->exp_last_request_time : 0);
- /* Tell the client if we're in recovery. */
- if (target->obd_recovering) {
- lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
- /* If this is the first time a client connects,
- reset the recovery timer */
- if (rc == 0)
- target_start_and_reset_recovery_timer(target, req,
- !export);
- }
+ /* If this is the first time a client connects,
+ * reset the recovery timer */
+ if (rc == 0 && target->obd_recovering)
+ target_start_and_reset_recovery_timer(target, req, !export);
/* We want to handle EALREADY but *not* -EALREADY from
* target_handle_reconnect(), return reconnection state in a flag */
cfs_waitq_signal(&target->obd_next_transno_waitq);
}
cfs_spin_unlock(&target->obd_recovery_task_lock);
+
+ /* Tell the client we're in recovery, when client is involved in it. */
+ if (target->obd_recovering)
+ lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
+
tmp = req_capsule_client_get(&req->rq_pill, &RMF_CONN);
conn = *tmp;
if (rc)
GOTO(out, rc);
+ if (OBD_FAIL_CHECK(OBD_FAIL_TGT_RCVG_FLAG))
+ lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
+
rc = mdt_connect_internal(lexp, mdt, data);
if (rc == 0) {
struct mdt_thread_info *mti;
imp->imp_vbr_failed = 1;
imp->imp_no_lock_replay = 1;
cfs_spin_unlock(&imp->imp_lock);
+ lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);
} else {
/** The transno had better not change over replay. */
LASSERTF(lustre_msg_get_transno(req->rq_reqmsg) ==
cfs_spin_unlock(&imp->imp_lock);
LASSERT(imp->imp_last_replay_transno);
+ /* transaction number shouldn't be bigger than the latest replayed */
+ if (req->rq_transno > lustre_msg_get_transno(req->rq_reqmsg)) {
+ DEBUG_REQ(D_ERROR, req,
+ "Reported transno "LPU64" is bigger than the "
+ "replayed one: "LPU64, req->rq_transno,
+ lustre_msg_get_transno(req->rq_reqmsg));
+ GOTO(out, rc = -EINVAL);
+ }
+
DEBUG_REQ(D_HA, req, "got rep");
/* let the callback do fixups, possibly including in the request */
* Errors while replay can set transno to 0, but
* imp_last_replay_transno shouldn't be set to 0 anyway
*/
- if (req->rq_transno > 0) {
- cfs_spin_lock(&imp->imp_lock);
- LASSERT(req->rq_transno <= imp->imp_last_replay_transno);
- imp->imp_last_replay_transno = req->rq_transno;
- cfs_spin_unlock(&imp->imp_lock);
- } else
+ if (req->rq_transno == 0)
CERROR("Transno is 0 during replay!\n");
+
/* continue with recovery */
rc = ptlrpc_import_recovery_state_machine(imp);
out:
*/
static int ptlrpc_check_req(struct ptlrpc_request *req)
{
+ int rc = 0;
+
if (unlikely(lustre_msg_get_conn_cnt(req->rq_reqmsg) <
req->rq_export->exp_conn_cnt)) {
DEBUG_REQ(D_ERROR, req,
error response instead. */
CDEBUG(D_RPCTRACE, "Dropping req %p for failed obd %s\n",
req, req->rq_export->exp_obd->obd_name);
- req->rq_status = -ENODEV;
+ rc = -ENODEV;
+ } else if (lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_REPLAY | MSG_REQ_REPLAY_DONE) &&
+ !(req->rq_export->exp_obd->obd_recovering)) {
+ DEBUG_REQ(D_ERROR, req,
+ "Invalid replay without recovery");
+ class_fail_export(req->rq_export);
+ rc = -ENODEV;
+ } else if (lustre_msg_get_transno(req->rq_reqmsg) != 0 &&
+ !(req->rq_export->exp_obd->obd_recovering)) {
+ DEBUG_REQ(D_ERROR, req, "Invalid req with transno "
+ LPU64" without recovery",
+ lustre_msg_get_transno(req->rq_reqmsg));
+ class_fail_export(req->rq_export);
+ rc = -ENODEV;
+ }
+
+ if (unlikely(rc < 0)) {
+ req->rq_status = rc;
ptlrpc_error(req);
- return -ENODEV;
}
-
- return 0;
+ return rc;
}
static void ptlrpc_at_set_timer(struct ptlrpc_service *svc)
}
run_test 44b "race in target handle connect"
+test_44c() {
+ replay_barrier $SINGLEMDS
+ createmany -m $DIR/$tfile-%d 100
+#define OBD_FAIL_TGT_RCVG_FLAG 0x712
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000712"
+ fail_abort $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 100 && return 1
+ fail $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 100 && return 1
+ return 0
+}
+run_test 44c "race in target handle connect"
+
# Handle failed close
test_45() {
mdcdev=`lctl get_param -n devices | awk '/MDT0000-mdc-/ {print $1}'`