- req = target_next_replay_req(obd);
- if (req != NULL) {
- char peer_str[PTL_NALFMT_SIZE];
- DEBUG_REQ(D_HA, req, "processing t"LPD64" from %s: ",
- req->rq_reqmsg->transno,
- ptlrpc_peernid2str(&req->rq_peer, peer_str));
- (void)trd->trd_recovery_handler(req);
- obd->obd_replayed_requests++;
- reset_recovery_timer(obd);
- /* bug 1580: decide how to properly sync() in recovery*/
- //mds_fsync_super(mds->mds_sb);
- ptlrpc_free_clone(req);
- spin_lock_bh(&obd->obd_processing_task_lock);
- obd->obd_next_recovery_transno++;
- spin_unlock_bh(&obd->obd_processing_task_lock);
- } else {
- /* recovery is over */
- spin_lock_bh(&obd->obd_processing_task_lock);
- obd->obd_recovering = 0;
- target_cancel_recovery_timer(obd);
- if (obd->obd_abort_recovery) {
- obd->obd_abort_recovery = 0;
- spin_unlock_bh(&obd->obd_processing_task_lock);
- target_abort_recovery(obd);
- } else {
- LASSERT(obd->obd_recoverable_clients == 0);
- spin_unlock_bh(&obd->obd_processing_task_lock);
- target_finish_recovery(obd);
- }
- }
+ DEBUG_REQ(D_HA, req, "processing t"LPD64" from %s: ",
+ req->rq_reqmsg->transno,
+ ptlrpc_peernid2str(&req->rq_peer, peer_str));
+ (void)trd->trd_recovery_handler(req);
+ obd->obd_replayed_requests++;
+ reset_recovery_timer(obd);
+ /* bug 1580: decide how to properly sync() in recovery*/
+ //mds_fsync_super(mds->mds_sb);
+ ptlrpc_free_clone(req);
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ obd->obd_next_recovery_transno++;
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ }
+
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ target_cancel_recovery_timer(obd);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+
+ /* If some clients haven't replayed requests in time, evict them */
+ if (obd->obd_abort_recovery) {
+ int stale;
+ CERROR("req replay timed out, aborting ...\n");
+ obd->obd_abort_recovery = 0;
+ stale = class_disconnect_stale_exports(obd, req_replay_done, 0);
+ atomic_sub(stale, &obd->obd_lock_replay_clients);
+ abort_req_replay_queue(obd);
+ }
+
+ /* The second stage: replay locks */
+ CWARN("2: lock replay stage - %d clients\n",
+ atomic_read(&obd->obd_lock_replay_clients));
+ while ((req = target_next_replay_lock(obd))) {
+ LASSERT(trd->trd_processing_task == current->pid);
+ DEBUG_REQ(D_HA, req, "processing lock from %s: ",
+ ptlrpc_peernid2str(&req->rq_peer, peer_str));
+ (void)trd->trd_recovery_handler(req);
+ reset_recovery_timer(obd);
+ ptlrpc_free_clone(req);
+ obd->obd_replayed_locks++;
+ }
+
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ target_cancel_recovery_timer(obd);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+
+ /* If some clients haven't replayed requests in time, evict them */
+ if (obd->obd_abort_recovery) {
+ int stale;
+ CERROR("lock replay timed out, aborting ...\n");
+ obd->obd_abort_recovery = 0;
+ stale = class_disconnect_stale_exports(obd, lock_replay_done, 0);
+ abort_lock_replay_queue(obd);
+ }
+
+ /* We drop recoverying flag to forward all new requests
+ * to regular mds_handle() since now */
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ obd->obd_recovering = 0;
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+
+ /* The third stage: reply on final pings */
+ CWARN("3: final stage - process recovery completion pings\n");
+ while ((req = target_next_final_ping(obd))) {
+ LASSERT(trd->trd_processing_task == current->pid);
+ DEBUG_REQ(D_HA, req, "processing final ping from %s: ",
+ ptlrpc_peernid2str(&req->rq_peer, peer_str));
+ (void)trd->trd_recovery_handler(req);
+ ptlrpc_free_clone(req);