if (rc && rc != EALREADY)
GOTO(out, rc);
- /* XXX track this all the time? */
- if (target->obd_recovering) {
- target->obd_connected_clients++;
- }
-
req->rq_repmsg->handle = conn;
/* If the client and the server are the same node, we will already
GOTO(out, rc = 0);
}
+ if (target->obd_recovering) {
+ target->obd_connected_clients++;
+ }
+
memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof conn),
sizeof conn);
queue_len = obd->obd_requests_queued_for_recovery;
next_transno = obd->obd_next_recovery_transno;
+ CDEBUG(D_HA,"max: %d, connected: %d, completed: %d, queue_len: %d, "
+ "req_transno: "LPU64", next_transno: "LPU64"\n",
+ max, connected, completed, queue_len, req_transno, next_transno);
if (obd->obd_abort_recovery) {
CDEBUG(D_HA, "waking for aborted recovery\n");
wake_up = 1;
* Also, if this request has a transno less than the one we're waiting
* for, we should process it now. It could (and currently always will)
* be an open request for a descriptor that was opened some time ago.
+ *
+ * Also, a resent, replayed request that has already been
+ * handled will pass through here and be processed immediately.
*/
if (obd->obd_processing_task == current->pid ||
transno < obd->obd_next_recovery_transno) {
return 1;
}
+ /* A resent, replayed request that is still on the queue; just drop it.
+ The queued request will handle this. */
+ if ((lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT | MSG_REPLAY)) ==
+ (MSG_RESENT | MSG_REPLAY)) {
+ DEBUG_REQ(D_ERROR, req, "dropping resent queued req");
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ OBD_FREE(reqmsg, req->rq_reqlen);
+ OBD_FREE(saved_req, sizeof *saved_req);
+ return 0;
+ }
+
memcpy(saved_req, req, sizeof *req);
memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
req = saved_req;
LASSERT_SPIN_LOCKED(&imp->imp_lock);
+ /* clear this for new requests that were resent as well
+ as resent replayed requests. */
+ lustre_msg_clear_flags(req->rq_reqmsg,
+ MSG_RESENT);
+
/* don't re-add requests that have been replayed */
if (!list_empty(&req->rq_replay_list))
return;
aa->praa_old_state = req->rq_send_state;
req->rq_send_state = LUSTRE_IMP_REPLAY;
req->rq_phase = RQ_PHASE_NEW;
- /*
- * Q: "How can a req get on the replay list if it wasn't replied?"
- * A: "If we failed during the replay of this request, it will still
- * be on the list, but rq_replied will have been reset to 0."
- */
- if (req->rq_replied) {
- aa->praa_old_status = req->rq_repmsg->status;
- req->rq_status = 0;
- req->rq_replied = 0;
- }
+ aa->praa_old_status = req->rq_repmsg->status;
+ req->rq_status = 0;
req->rq_interpret_reply = ptlrpc_replay_interpret;
atomic_inc(&req->rq_import->imp_replay_inflight);
IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
imp->imp_conn_cnt++;
- imp->imp_last_replay_transno = 0;
+ imp->imp_resend_replay = 0;
if (imp->imp_remote_handle.cookie == 0) {
initial_connect = 1;
imp->imp_connection->c_remote_uuid.uuid);
}
- if (imp->imp_invalid)
+ if (imp->imp_invalid) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
- else
+ } else if (MSG_CONNECT_RECOVERING & msg_flags) {
+ imp->imp_resend_replay = 1;
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
+ } else {
IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+ }
} else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) {
LASSERT(imp->imp_replayable);
imp->imp_remote_handle = request->rq_repmsg->handle;
+ imp->imp_last_replay_transno = 0;
IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
} else {
imp->imp_remote_handle = request->rq_repmsg->handle;
{
int rc = 0;
struct list_head *tmp, *pos;
- struct ptlrpc_request *req;
+ struct ptlrpc_request *req = NULL;
unsigned long flags;
__u64 last_transno;
ENTRY;
*/
list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+
+ /* If need to resend, stop on the matching one first. It's
+ possible though it's already been committed, so in that case
+ we'll just continue with replay */
+ if (imp->imp_resend_replay &&
+ req->rq_transno == last_transno) {
+ lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
+ break;
+ }
+
if (req->rq_transno > last_transno) {
- rc = ptlrpc_replay_req(req);
- if (rc) {
- CERROR("recovery replay error %d for req "
- LPD64"\n", rc, req->rq_xid);
- RETURN(rc);
- }
- *inflight = 1;
+ imp->imp_last_replay_transno = req->rq_transno;
break;
}
+
+ req = NULL;
+ }
+
+ imp->imp_resend_replay = 0;
+
+ if (req != NULL) {
+ rc = ptlrpc_replay_req(req);
+ if (rc) {
+ CERROR("recovery replay error %d for req "
+ LPD64"\n", rc, req->rq_xid);
+ RETURN(rc);
+ }
+ *inflight = 1;
}
RETURN(rc);
}
fail mds
fi
- umount $MOUNT2
- umount $MOUNT
+ umount $MOUNT2 || true
+ umount $MOUNT || true
rmmod llite
stop mds ${FORCE}
stop ost2 ${FORCE}
run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2"
test_7() {
+ replay_barrier mds
+ createmany -o $MOUNT1/$tfile- 25
+ createmany -o $MOUNT2/$tfile-2- 1
+ createmany -o $MOUNT1/$tfile-3- 25
+ umount $MOUNT2
+
+ facet_failover mds
+ # expect failover to fail
+ df $MOUNT && return 1
+
+# 3313 - current fix for 3313 prevents any reply here
+# unlinkmany $MOUNT1/$tfile- 25 || return 2
+
+ zconf_mount `hostname` $MOUNT2
+ return 0
+}
+run_test 7 "timeouts waiting for lost client during replay"
+
+
+test_8() {
+ replay_barrier mds
+ drop_reint_reply "mcreate $MOUNT1/$tfile" || return 1
+ fail mds
+ checkstat $MOUNT2/$tfile || return 2
+ rm $MOUNT1/$tfile || return 3
+
+ return 0
+}
+run_test 8 "replay of resent request"
+
+test_9() {
+ replay_barrier mds
+ mcreate $MOUNT1/$tfile-1
+ mcreate $MOUNT2/$tfile-2
+ # drop first reint reply
+ sysctl -w lustre.fail_loc=0x80000119
+ fail mds
+ sysctl -w lustre.fail_loc=0
+
+ rm $MOUNT1/$tfile-[1,2] || return 1
+
+ return 0
+}
+run_test 9 "resending a replayed create"
+
+test_10() {
+ mcreate $MOUNT1/$tfile-1
+ replay_barrier mds
+ munlink $MOUNT1/$tfile-1
+ mcreate $MOUNT2/$tfile-2
+ # drop first reint reply
+ sysctl -w lustre.fail_loc=0x80000119
+ fail mds
+ sysctl -w lustre.fail_loc=0
+
+ checkstat $MOUNT1/$tfile-1 && return 1
+ checkstat $MOUNT1/$tfile-2 || return 2
+ rm $MOUNT1/$tfile-2
+
+ return 0
+}
+run_test 10 "resending a replayed unlink"
+
+test_11() {
+ replay_barrier mds
+ mcreate $MOUNT1/$tfile-1
+ mcreate $MOUNT2/$tfile-2
+ mcreate $MOUNT1/$tfile-3
+ mcreate $MOUNT2/$tfile-4
+ mcreate $MOUNT1/$tfile-5
+ # drop all reint replies for a while
+ sysctl -w lustre.fail_loc=0x0119
+ facet_failover mds
+ #sleep for while, let both clients reconnect and timeout
+ sleep $((TIMEOUT * 2))
+ sysctl -w lustre.fail_loc=0
+
+ rm $MOUNT1/$tfile-[1-5] || return 1
+
+ return 0
+}
+run_test 11 "both clients timeout during replay"
+
+test_12() {
+ replay_barrier mds
+
+ multiop $DIR/$tfile mo_c &
+ MULTIPID=$!
+ sleep 5
+
+ # drop first enqueue
+ sysctl -w lustre.fail_loc=0x80000302
+ facet_failover mds
+ df $MOUNT || return 1
+ sysctl -w lustre.fail_loc=0
+
+ ls $DIR/$tfile
+ $CHECKSTAT -t file $DIR/$tfile || return 2
+ kill -USR1 $MULTIPID || return 3
+ wait $MULTIPID || return 4
+ rm $DIR/$tfile
+
+ return 0
+}
+run_test 12 "open resend timeout"
+
+test_13() {
+ multiop $DIR/$tfile mo_c &
+ MULTIPID=$!
+ sleep 5
+
+ replay_barrier mds
+
+ kill -USR1 $MULTIPID || return 3
+ wait $MULTIPID || return 4
+
+ # drop close
+ sysctl -w lustre.fail_loc=0x80000115
+ facet_failover mds
+ df $MOUNT || return 1
+ sysctl -w lustre.fail_loc=0
+
+ ls $DIR/$tfile
+ $CHECKSTAT -t file $DIR/$tfile || return 2
+ rm $DIR/$tfile
+
+ return 0
+}
+run_test 13 "close resend timeout"
+
+test_7() {
mcreate $MOUNT1/a
multiop $MOUNT2/a o_c &
pid1=$!