X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftarget%2Ftgt_handler.c;h=24e6936e9472b012a0b2d4febeb57a4a2d6cca71;hp=4a5287510ef4030c83b0854e3e993d9098dd2b89;hb=c1d465de13ccf0eda8020c88661c3cc4d78538ca;hpb=a0c644fde3405bba6752885481f0fdfe05da1bcd diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index 4a52875..24e6936 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -661,6 +661,19 @@ static int process_req_last_xid(struct ptlrpc_request *req) RETURN(-EPROTO); } + /* The "last_xid" is the minimum xid among unreplied requests, + * if the request is from the previous connection, its xid can + * still be larger than "exp_last_xid", then the above check of + * xid is not enough to determine whether the request is delayed. + * + * For example, if some replay request was delayed and caused + * timeout at client and the replay is restarted, the delayed + * replay request will have the larger xid than "exp_last_xid" + */ + if (req->rq_export->exp_conn_cnt > + lustre_msg_get_conn_cnt(req->rq_reqmsg)) + RETURN(-ESTALE); + /* try to release in-memory reply data */ if (tgt_is_multimodrpcs_client(req->rq_export)) { tgt_handle_received_xid(req->rq_export, @@ -687,6 +700,19 @@ int tgt_request_handle(struct ptlrpc_request *req) bool is_connect = false; ENTRY; + if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_TGT_RECOVERY_REQ_RACE))) { + if (cfs_fail_val == 0 && + lustre_msg_get_opc(msg) != OBD_PING && + lustre_msg_get_flags(msg) & MSG_REQ_REPLAY_DONE) { + struct l_wait_info lwi = { 0 }; + + cfs_fail_val = 1; + cfs_race_state = 0; + l_wait_event(cfs_race_waitq, (cfs_race_state == 1), + &lwi); + } + } + /* Refill the context, to make sure all thread keys are allocated */ lu_env_refill(req->rq_svc_thread->t_env);