From: uid723 <uid723>
Date: Wed, 16 Jun 2004 07:33:21 +0000 (+0000)
Subject: b=1742
X-Git-Tag: 1.3.4~736
X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=da823769e09aa247eb0b7533a66639f3cab9d9e7;p=fs%2Flustre-release.git

b=1742

- landed on HEAD
---

diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index 11431c9..b6ae61c 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -499,11 +499,6 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         if (rc && rc != EALREADY)
                 GOTO(out, rc);
 
-        /* XXX track this all the time? */
-        if (target->obd_recovering) {
-                target->obd_connected_clients++;
-        }
-
         req->rq_repmsg->handle = conn;
 
         /* If the client and the server are the same node, we will already
@@ -547,6 +542,10 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                 GOTO(out, rc = 0);
         }
 
+        if (target->obd_recovering) {
+                target->obd_connected_clients++;
+        }
+
         memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof conn),
                sizeof conn);
 
@@ -742,6 +741,9 @@ static int check_for_next_transno(struct obd_device *obd)
         queue_len = obd->obd_requests_queued_for_recovery;
         next_transno = obd->obd_next_recovery_transno;
 
+        CDEBUG(D_HA,"max: %d, connected: %d, completed: %d, queue_len: %d, "
+               "req_transno: "LPU64", next_transno: "LPU64"\n",
+               max, connected, completed, queue_len, req_transno, next_transno);
         if (obd->obd_abort_recovery) {
                 CDEBUG(D_HA, "waking for aborted recovery\n");
                 wake_up = 1;
@@ -855,6 +857,9 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
          * Also, if this request has a transno less than the one we're waiting
          * for, we should process it now.  It could (and currently always will)
          * be an open request for a descriptor that was opened some time ago.
+         *
+         * Also, a resent, replayed request that has already been
+         * handled will pass through here and be processed immediately.
          */
         if (obd->obd_processing_task == current->pid ||
             transno < obd->obd_next_recovery_transno) {
@@ -866,6 +871,17 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
                 return 1;
         }
 
+        /* A resent, replayed request that is still on the queue; just drop it.
+           The queued request will handle this. */
+        if ((lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT | MSG_REPLAY)) ==
+            (MSG_RESENT | MSG_REPLAY)) {
+                DEBUG_REQ(D_ERROR, req, "dropping resent queued req");
+                spin_unlock_bh(&obd->obd_processing_task_lock);
+                OBD_FREE(reqmsg, req->rq_reqlen);
+                OBD_FREE(saved_req, sizeof *saved_req);
+                return 0;
+        }
+
         memcpy(saved_req, req, sizeof *req);
         memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
         req = saved_req;
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index 5481a8f..452f3ed 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -1263,6 +1263,11 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
 
         LASSERT_SPIN_LOCKED(&imp->imp_lock);
 
+        /* clear this  for new requests that were resent as well
+           as resent replayed requests. */
+        lustre_msg_clear_flags(req->rq_reqmsg,
+                             MSG_RESENT);
+
         /* don't re-add requests that have been replayed */
         if (!list_empty(&req->rq_replay_list))
                 return;
@@ -1581,16 +1586,8 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
         aa->praa_old_state = req->rq_send_state;
         req->rq_send_state = LUSTRE_IMP_REPLAY;
         req->rq_phase = RQ_PHASE_NEW;
-        /*
-         * Q: "How can a req get on the replay list if it wasn't replied?"
-         * A: "If we failed during the replay of this request, it will still
-         *     be on the list, but rq_replied will have been reset to 0."
-         */
-        if (req->rq_replied) {
-                aa->praa_old_status = req->rq_repmsg->status;
-                req->rq_status = 0;
-                req->rq_replied = 0;
-        }
+        aa->praa_old_status = req->rq_repmsg->status;
+        req->rq_status = 0;
 
         req->rq_interpret_reply = ptlrpc_replay_interpret;
         atomic_inc(&req->rq_import->imp_replay_inflight);
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c
index 045896f..e2106b4 100644
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -256,7 +256,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
         IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
 
         imp->imp_conn_cnt++;
-        imp->imp_last_replay_transno = 0;
+        imp->imp_resend_replay = 0;
 
         if (imp->imp_remote_handle.cookie == 0) {
                 initial_connect = 1;
@@ -398,13 +398,18 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                                imp->imp_connection->c_remote_uuid.uuid);
                 }
 
-                if (imp->imp_invalid)
+                if (imp->imp_invalid) {
                         IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
-                else
+                } else if (MSG_CONNECT_RECOVERING & msg_flags) {
+                        imp->imp_resend_replay = 1;
+                        IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
+                } else {
                         IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+                }
         } else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) {
                 LASSERT(imp->imp_replayable);
                 imp->imp_remote_handle = request->rq_repmsg->handle;
+                imp->imp_last_replay_transno = 0;
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
         } else {
                 imp->imp_remote_handle = request->rq_repmsg->handle;
diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c
index a719b43..6d4af7a 100644
--- a/lustre/ptlrpc/recover.c
+++ b/lustre/ptlrpc/recover.c
@@ -151,7 +151,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
 {
         int rc = 0;
         struct list_head *tmp, *pos;
-        struct ptlrpc_request *req;
+        struct ptlrpc_request *req = NULL;
         unsigned long flags;
         __u64 last_transno;
         ENTRY;
@@ -187,16 +187,34 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
          */
         list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+
+                /* If need to resend, stop on the matching one first. It's 
+                   possible though it's already been committed, so in that case 
+                   we'll just continue with replay */
+                if (imp->imp_resend_replay && 
+                    req->rq_transno == last_transno) {
+                        lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
+                        break;
+                }
+
                 if (req->rq_transno > last_transno) {
-                        rc = ptlrpc_replay_req(req);
-                        if (rc) {
-                                CERROR("recovery replay error %d for req "
-                                       LPD64"\n", rc, req->rq_xid);
-                                RETURN(rc);
-                        }
-                        *inflight = 1;
+                        imp->imp_last_replay_transno = req->rq_transno;
                         break;
                 }
+
+                req = NULL;
+        }
+
+        imp->imp_resend_replay = 0;
+
+        if (req != NULL) {
+                rc = ptlrpc_replay_req(req);
+                if (rc) {
+                        CERROR("recovery replay error %d for req "
+                               LPD64"\n", rc, req->rq_xid);
+                        RETURN(rc);
+                }
+                *inflight = 1;
         }
         RETURN(rc);
 }
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh
index c91837b..d702b59 100755
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -35,8 +35,8 @@ cleanup() {
         fail mds
     fi
 
-    umount $MOUNT2
-    umount $MOUNT
+    umount $MOUNT2 || true
+    umount $MOUNT || true
     rmmod llite
     stop mds ${FORCE}
     stop ost2 ${FORCE}
@@ -176,6 +176,137 @@ test_6() {
 run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2"
 
 test_7() {
+    replay_barrier mds
+    createmany -o $MOUNT1/$tfile- 25
+    createmany -o $MOUNT2/$tfile-2- 1
+    createmany -o $MOUNT1/$tfile-3- 25
+    umount $MOUNT2
+
+    facet_failover mds
+    # expect failover to fail
+    df $MOUNT && return 1
+
+#   3313 - current fix for 3313 prevents any reply here
+#    unlinkmany $MOUNT1/$tfile- 25 || return 2
+
+    zconf_mount `hostname` $MOUNT2
+    return 0
+}
+run_test 7 "timeouts waiting for lost client during replay"
+
+
+test_8() {
+    replay_barrier mds
+    drop_reint_reply "mcreate $MOUNT1/$tfile"    || return 1
+    fail mds
+    checkstat $MOUNT2/$tfile || return 2
+    rm $MOUNT1/$tfile || return 3
+
+    return 0
+}
+run_test 8 "replay of resent request"
+
+test_9() {
+    replay_barrier mds
+    mcreate $MOUNT1/$tfile-1
+    mcreate $MOUNT2/$tfile-2
+    # drop first reint reply
+    sysctl -w lustre.fail_loc=0x80000119
+    fail mds
+    sysctl -w lustre.fail_loc=0
+
+    rm $MOUNT1/$tfile-[1,2] || return 1
+
+    return 0
+}
+run_test 9 "resending a replayed create"
+
+test_10() {
+    mcreate $MOUNT1/$tfile-1
+    replay_barrier mds
+    munlink $MOUNT1/$tfile-1
+    mcreate $MOUNT2/$tfile-2
+    # drop first reint reply
+    sysctl -w lustre.fail_loc=0x80000119
+    fail mds
+    sysctl -w lustre.fail_loc=0
+
+    checkstat $MOUNT1/$tfile-1 && return 1
+    checkstat $MOUNT1/$tfile-2 || return 2
+    rm $MOUNT1/$tfile-2
+
+    return 0
+}
+run_test 10 "resending a replayed unlink"
+
+test_11() {
+    replay_barrier mds
+    mcreate $MOUNT1/$tfile-1
+    mcreate $MOUNT2/$tfile-2
+    mcreate $MOUNT1/$tfile-3
+    mcreate $MOUNT2/$tfile-4
+    mcreate $MOUNT1/$tfile-5
+    # drop all reint replies for a while
+    sysctl -w lustre.fail_loc=0x0119
+    facet_failover mds
+    #sleep for while, let both clients reconnect and timeout
+    sleep $((TIMEOUT * 2))
+    sysctl -w lustre.fail_loc=0
+
+    rm $MOUNT1/$tfile-[1-5] || return 1
+
+    return 0
+}
+run_test 11 "both clients timeout during replay"
+
+test_12() {
+    replay_barrier mds
+
+    multiop $DIR/$tfile mo_c &
+    MULTIPID=$!
+    sleep 5
+
+    # drop first enqueue
+    sysctl -w lustre.fail_loc=0x80000302
+    facet_failover mds
+    df $MOUNT || return 1
+    sysctl -w lustre.fail_loc=0
+
+    ls $DIR/$tfile
+    $CHECKSTAT -t file $DIR/$tfile || return 2
+    kill -USR1 $MULTIPID || return 3
+    wait $MULTIPID || return 4
+    rm $DIR/$tfile
+
+    return 0
+}
+run_test 12 "open resend timeout"
+
+test_13() {
+    multiop $DIR/$tfile mo_c &
+    MULTIPID=$!
+    sleep 5
+
+    replay_barrier mds
+
+    kill -USR1 $MULTIPID || return 3
+    wait $MULTIPID || return 4
+
+    # drop close 
+    sysctl -w lustre.fail_loc=0x80000115
+    facet_failover mds
+    df $MOUNT || return 1
+    sysctl -w lustre.fail_loc=0
+
+    ls $DIR/$tfile
+    $CHECKSTAT -t file $DIR/$tfile || return 2
+    rm $DIR/$tfile
+
+    return 0
+}
+run_test 13 "close resend timeout"
+
+test_7() {
     mcreate $MOUNT1/a
     multiop $MOUNT2/a o_c &
     pid1=$!