- spin_lock(&imp->imp_lock);
- imp->imp_last_transno_checked = 0;
- ptlrpc_free_committed(imp);
- last_transno = imp->imp_last_replay_transno;
- spin_unlock(&imp->imp_lock);
-
- CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
- imp, obd2cli_tgt(imp->imp_obd),
- imp->imp_peer_committed_transno, last_transno);
-
- /* Do I need to hold a lock across this iteration? We shouldn't be
- * racing with any additions to the list, because we're in recovery
- * and are therefore not processing additional requests to add. Calls
- * to ptlrpc_free_committed might commit requests, but nothing "newer"
- * than the one we're replaying (it can't be committed until it's
- * replayed, and we're doing that here). l_f_e_safe protects against
- * problems with the current request being committed, in the unlikely
- * event of that race. So, in conclusion, I think that it's safe to
- * perform this list-walk without the imp_lock held.
- *
- * But, the {mdc,osc}_replay_open callbacks both iterate
- * request lists, and have comments saying they assume the
- * imp_lock is being held by ptlrpc_replay, but it's not. it's
- * just a little race...
- */
- list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
- req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
-
- /* If need to resend the last sent transno (because a
- reconnect has occurred), then stop on the matching
- req and send it again. If, however, the last sent
- transno has been committed then we continue replay
- from the next request. */
- if (imp->imp_resend_replay &&
- req->rq_transno == last_transno) {
- lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
- break;
- }
-
- if (req->rq_transno > last_transno) {
- imp->imp_last_replay_transno = req->rq_transno;
- break;
- }
-
- req = NULL;
- }
-
- imp->imp_resend_replay = 0;
-
- if (req != NULL) {
- rc = ptlrpc_replay_req(req);
- if (rc) {
- CERROR("recovery replay error %d for req "
- LPD64"\n", rc, req->rq_xid);
- RETURN(rc);
- }
- *inflight = 1;
- }
- RETURN(rc);
+ spin_lock(&imp->imp_lock);
+ imp->imp_last_transno_checked = 0;
+ ptlrpc_free_committed(imp);
+ last_transno = imp->imp_last_replay_transno;
+
+ CDEBUG(D_HA, "import %p from %s committed %llu last %llu\n",
+ imp, obd2cli_tgt(imp->imp_obd),
+ imp->imp_peer_committed_transno, last_transno);
+
+ /* Replay all the committed open requests on committed_list first */
+ if (!list_empty(&imp->imp_committed_list)) {
+ tmp = imp->imp_committed_list.prev;
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_replay_list);
+
+ /* The last request on committed_list hasn't been replayed */
+ if (req->rq_transno > last_transno) {
+ if (!imp->imp_resend_replay ||
+ imp->imp_replay_cursor == &imp->imp_committed_list)
+ imp->imp_replay_cursor =
+ imp->imp_replay_cursor->next;
+
+ while (imp->imp_replay_cursor !=
+ &imp->imp_committed_list) {
+ req = list_entry(imp->imp_replay_cursor,
+ struct ptlrpc_request,
+ rq_replay_list);
+ if (req->rq_transno > last_transno)
+ break;
+
+ req = NULL;
+ LASSERT(!list_empty(imp->imp_replay_cursor));
+ imp->imp_replay_cursor =
+ imp->imp_replay_cursor->next;
+ }
+ } else {
+ /* All requests on committed_list have been replayed */
+ imp->imp_replay_cursor = &imp->imp_committed_list;
+ req = NULL;
+ }
+ }
+
+ /* All the requests in committed list have been replayed, let's replay
+ * the imp_replay_list */
+ if (req == NULL) {
+ list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_replay_list);
+
+ if (req->rq_transno > last_transno)
+ break;
+ req = NULL;
+ }
+ }
+
+ /* If need to resend the last sent transno (because a reconnect
+ * has occurred), then stop on the matching req and send it again.
+ * If, however, the last sent transno has been committed then we
+ * continue replay from the next request. */
+ if (req != NULL && imp->imp_resend_replay)
+ lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
+
+ /* ptlrpc_prepare_replay() may fail to add the reqeust into unreplied
+ * list if the request hasn't been added to replay list then. Another
+ * exception is that resend replay could have been removed from the
+ * unreplied list. */
+ if (req != NULL && list_empty(&req->rq_unreplied_list)) {
+ DEBUG_REQ(D_HA, req, "resend_replay=%d, last_transno=%llu",
+ imp->imp_resend_replay, last_transno);
+ ptlrpc_add_unreplied(req);
+ imp->imp_known_replied_xid = ptlrpc_known_replied_xid(imp);
+ }
+
+ imp->imp_resend_replay = 0;
+ spin_unlock(&imp->imp_lock);
+
+ if (req != NULL) {
+ LASSERT(!list_empty(&req->rq_unreplied_list));
+
+ rc = ptlrpc_replay_req(req);
+ if (rc) {
+ CERROR("recovery replay error %d for req "
+ "%llu\n", rc, req->rq_xid);
+ RETURN(rc);
+ }
+ *inflight = 1;
+ }
+ RETURN(rc);