From: uid723 Date: Wed, 16 Jun 2004 07:33:21 +0000 (+0000) Subject: b=1742 X-Git-Tag: 1.3.4~736 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=da823769e09aa247eb0b7533a66639f3cab9d9e7;p=fs%2Flustre-release.git b=1742 - landed on HEAD --- diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 11431c9..b6ae61c 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -499,11 +499,6 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (rc && rc != EALREADY) GOTO(out, rc); - /* XXX track this all the time? */ - if (target->obd_recovering) { - target->obd_connected_clients++; - } - req->rq_repmsg->handle = conn; /* If the client and the server are the same node, we will already @@ -547,6 +542,10 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) GOTO(out, rc = 0); } + if (target->obd_recovering) { + target->obd_connected_clients++; + } + memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof conn), sizeof conn); @@ -742,6 +741,9 @@ static int check_for_next_transno(struct obd_device *obd) queue_len = obd->obd_requests_queued_for_recovery; next_transno = obd->obd_next_recovery_transno; + CDEBUG(D_HA,"max: %d, connected: %d, completed: %d, queue_len: %d, " + "req_transno: "LPU64", next_transno: "LPU64"\n", + max, connected, completed, queue_len, req_transno, next_transno); if (obd->obd_abort_recovery) { CDEBUG(D_HA, "waking for aborted recovery\n"); wake_up = 1; @@ -855,6 +857,9 @@ int target_queue_recovery_request(struct ptlrpc_request *req, * Also, if this request has a transno less than the one we're waiting * for, we should process it now. It could (and currently always will) * be an open request for a descriptor that was opened some time ago. + * + * Also, a resent, replayed request that has already been + * handled will pass through here and be processed immediately. */ if (obd->obd_processing_task == current->pid || transno < obd->obd_next_recovery_transno) { @@ -866,6 +871,17 @@ int target_queue_recovery_request(struct ptlrpc_request *req, return 1; } + /* A resent, replayed request that is still on the queue; just drop it. + The queued request will handle this. */ + if ((lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT | MSG_REPLAY)) == + (MSG_RESENT | MSG_REPLAY)) { + DEBUG_REQ(D_ERROR, req, "dropping resent queued req"); + spin_unlock_bh(&obd->obd_processing_task_lock); + OBD_FREE(reqmsg, req->rq_reqlen); + OBD_FREE(saved_req, sizeof *saved_req); + return 0; + } + memcpy(saved_req, req, sizeof *req); memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); req = saved_req; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 5481a8f..452f3ed 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1263,6 +1263,11 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, LASSERT_SPIN_LOCKED(&imp->imp_lock); + /* clear this for new requests that were resent as well + as resent replayed requests. */ + lustre_msg_clear_flags(req->rq_reqmsg, + MSG_RESENT); + /* don't re-add requests that have been replayed */ if (!list_empty(&req->rq_replay_list)) return; @@ -1581,16 +1586,8 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) aa->praa_old_state = req->rq_send_state; req->rq_send_state = LUSTRE_IMP_REPLAY; req->rq_phase = RQ_PHASE_NEW; - /* - * Q: "How can a req get on the replay list if it wasn't replied?" - * A: "If we failed during the replay of this request, it will still - * be on the list, but rq_replied will have been reset to 0." - */ - if (req->rq_replied) { - aa->praa_old_status = req->rq_repmsg->status; - req->rq_status = 0; - req->rq_replied = 0; - } + aa->praa_old_status = req->rq_repmsg->status; + req->rq_status = 0; req->rq_interpret_reply = ptlrpc_replay_interpret; atomic_inc(&req->rq_import->imp_replay_inflight); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 045896f..e2106b4 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -256,7 +256,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING); imp->imp_conn_cnt++; - imp->imp_last_replay_transno = 0; + imp->imp_resend_replay = 0; if (imp->imp_remote_handle.cookie == 0) { initial_connect = 1; @@ -398,13 +398,18 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, imp->imp_connection->c_remote_uuid.uuid); } - if (imp->imp_invalid) + if (imp->imp_invalid) { IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); - else + } else if (MSG_CONNECT_RECOVERING & msg_flags) { + imp->imp_resend_replay = 1; + IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY); + } else { IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); + } } else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) { LASSERT(imp->imp_replayable); imp->imp_remote_handle = request->rq_repmsg->handle; + imp->imp_last_replay_transno = 0; IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY); } else { imp->imp_remote_handle = request->rq_repmsg->handle; diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index a719b43..6d4af7a 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -151,7 +151,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) { int rc = 0; struct list_head *tmp, *pos; - struct ptlrpc_request *req; + struct ptlrpc_request *req = NULL; unsigned long flags; __u64 last_transno; ENTRY; @@ -187,16 +187,34 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) */ list_for_each_safe(tmp, pos, &imp->imp_replay_list) { req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); + + /* If need to resend, stop on the matching one first. It's + possible though it's already been committed, so in that case + we'll just continue with replay */ + if (imp->imp_resend_replay && + req->rq_transno == last_transno) { + lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT); + break; + } + if (req->rq_transno > last_transno) { - rc = ptlrpc_replay_req(req); - if (rc) { - CERROR("recovery replay error %d for req " - LPD64"\n", rc, req->rq_xid); - RETURN(rc); - } - *inflight = 1; + imp->imp_last_replay_transno = req->rq_transno; break; } + + req = NULL; + } + + imp->imp_resend_replay = 0; + + if (req != NULL) { + rc = ptlrpc_replay_req(req); + if (rc) { + CERROR("recovery replay error %d for req " + LPD64"\n", rc, req->rq_xid); + RETURN(rc); + } + *inflight = 1; } RETURN(rc); } diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index c91837b..d702b59 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -35,8 +35,8 @@ cleanup() { fail mds fi - umount $MOUNT2 - umount $MOUNT + umount $MOUNT2 || true + umount $MOUNT || true rmmod llite stop mds ${FORCE} stop ost2 ${FORCE} @@ -176,6 +176,137 @@ test_6() { run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2" test_7() { + replay_barrier mds + createmany -o $MOUNT1/$tfile- 25 + createmany -o $MOUNT2/$tfile-2- 1 + createmany -o $MOUNT1/$tfile-3- 25 + umount $MOUNT2 + + facet_failover mds + # expect failover to fail + df $MOUNT && return 1 + +# 3313 - current fix for 3313 prevents any reply here +# unlinkmany $MOUNT1/$tfile- 25 || return 2 + + zconf_mount `hostname` $MOUNT2 + return 0 +} +run_test 7 "timeouts waiting for lost client during replay" + + +test_8() { + replay_barrier mds + drop_reint_reply "mcreate $MOUNT1/$tfile" || return 1 + fail mds + checkstat $MOUNT2/$tfile || return 2 + rm $MOUNT1/$tfile || return 3 + + return 0 +} +run_test 8 "replay of resent request" + +test_9() { + replay_barrier mds + mcreate $MOUNT1/$tfile-1 + mcreate $MOUNT2/$tfile-2 + # drop first reint reply + sysctl -w lustre.fail_loc=0x80000119 + fail mds + sysctl -w lustre.fail_loc=0 + + rm $MOUNT1/$tfile-[1,2] || return 1 + + return 0 +} +run_test 9 "resending a replayed create" + +test_10() { + mcreate $MOUNT1/$tfile-1 + replay_barrier mds + munlink $MOUNT1/$tfile-1 + mcreate $MOUNT2/$tfile-2 + # drop first reint reply + sysctl -w lustre.fail_loc=0x80000119 + fail mds + sysctl -w lustre.fail_loc=0 + + checkstat $MOUNT1/$tfile-1 && return 1 + checkstat $MOUNT1/$tfile-2 || return 2 + rm $MOUNT1/$tfile-2 + + return 0 +} +run_test 10 "resending a replayed unlink" + +test_11() { + replay_barrier mds + mcreate $MOUNT1/$tfile-1 + mcreate $MOUNT2/$tfile-2 + mcreate $MOUNT1/$tfile-3 + mcreate $MOUNT2/$tfile-4 + mcreate $MOUNT1/$tfile-5 + # drop all reint replies for a while + sysctl -w lustre.fail_loc=0x0119 + facet_failover mds + #sleep for while, let both clients reconnect and timeout + sleep $((TIMEOUT * 2)) + sysctl -w lustre.fail_loc=0 + + rm $MOUNT1/$tfile-[1-5] || return 1 + + return 0 +} +run_test 11 "both clients timeout during replay" + +test_12() { + replay_barrier mds + + multiop $DIR/$tfile mo_c & + MULTIPID=$! + sleep 5 + + # drop first enqueue + sysctl -w lustre.fail_loc=0x80000302 + facet_failover mds + df $MOUNT || return 1 + sysctl -w lustre.fail_loc=0 + + ls $DIR/$tfile + $CHECKSTAT -t file $DIR/$tfile || return 2 + kill -USR1 $MULTIPID || return 3 + wait $MULTIPID || return 4 + rm $DIR/$tfile + + return 0 +} +run_test 12 "open resend timeout" + +test_13() { + multiop $DIR/$tfile mo_c & + MULTIPID=$! + sleep 5 + + replay_barrier mds + + kill -USR1 $MULTIPID || return 3 + wait $MULTIPID || return 4 + + # drop close + sysctl -w lustre.fail_loc=0x80000115 + facet_failover mds + df $MOUNT || return 1 + sysctl -w lustre.fail_loc=0 + + ls $DIR/$tfile + $CHECKSTAT -t file $DIR/$tfile || return 2 + rm $DIR/$tfile + + return 0 +} +run_test 13 "close resend timeout" + +test_7() { mcreate $MOUNT1/a multiop $MOUNT2/a o_c & pid1=$!