Whamcloud - gitweb
LU-5079 ptlrpc: fix early reply timeout for recovery 13/11213/11
authorAlexander.Boyko <alexander_boyko@xyratex.com>
Thu, 24 Jul 2014 14:35:44 +0000 (18:35 +0400)
committerAndreas Dilger <andreas.dilger@intel.com>
Tue, 21 Oct 2014 18:06:38 +0000 (18:06 +0000)
Commit 8b2f9c0e408 http://review.whamcloud.com/9100 changed
the deadline calculation from current time to request arrival.
During recovery, the new deadline could be less than calculated
at request arrival time. And even worse, the deadline may be
in past. For the first case unneeded early reply would be sent
to client. For the second, client requests would be dropped by
timeout and client reconnect happend.

Do at_measured() for recovery in the same way like general early
reply base on the current time. And set new timeout to the end
of recovery.

Test to check recovery deadline bug.

Signed-off-by: Alexander Boyko <alexander_boyko@xyratex.com>
Change-Id: I29327cb962d1c1a3cd8a6181d27a29593d1d8fc4
Xyratex-bug-id: MRP-1988
Reviewed-on: http://review.whamcloud.com/11213
Tested-by: Jenkins
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Chris Horn <hornc@cray.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
lustre/include/obd_support.h
lustre/ldlm/ldlm_lib.c
lustre/ptlrpc/service.c
lustre/tests/replay-ost-single.sh

index ff61ede..3257997 100644 (file)
@@ -417,6 +417,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_TGT_CLIENT_ADD          0x711
 #define OBD_FAIL_TGT_RCVG_FLAG           0x712
 #define OBD_FAIL_TGT_DELAY_CONDITIONAL  0x713
+#define OBD_FAIL_TGT_REPLAY_DELAY2       0x714
 
 #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
 #define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
index 46ee0e8..0c878dc 100644 (file)
@@ -1806,17 +1806,19 @@ repeat:
 
 static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd)
 {
-        struct ptlrpc_request *req = NULL;
-        ENTRY;
+       struct ptlrpc_request *req = NULL;
+       ENTRY;
 
-        CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
-               obd->obd_next_recovery_transno);
+       CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
+               obd->obd_next_recovery_transno);
 
-        if (target_recovery_overseer(obd, check_for_next_transno,
-                                     exp_req_replay_healthy)) {
-                abort_req_replay_queue(obd);
-                abort_lock_replay_queue(obd);
-        }
+       CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
+
+       if (target_recovery_overseer(obd, check_for_next_transno,
+                                    exp_req_replay_healthy)) {
+               abort_req_replay_queue(obd);
+               abort_lock_replay_queue(obd);
+       }
 
        spin_lock(&obd->obd_recovery_task_lock);
        if (!list_empty(&obd->obd_req_replay_queue)) {
index 71e4804..706b042 100644 (file)
@@ -1293,16 +1293,18 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                 RETURN(-ENOSYS);
         }
 
-        if (req->rq_export &&
-            lustre_msg_get_flags(req->rq_reqmsg) &
-            (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
-                /* During recovery, we don't want to send too many early
-                 * replies, but on the other hand we want to make sure the
-                 * client has enough time to resend if the rpc is lost. So
-                 * during the recovery period send at least 4 early replies,
-                 * spacing them every at_extra if we can. at_estimate should
-                 * always equal this fixed value during recovery. */
-               at_measured(&svcpt->scp_at_estimate, min(at_extra,
+       if (req->rq_export &&
+           lustre_msg_get_flags(req->rq_reqmsg) &
+           (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+               /* During recovery, we don't want to send too many early
+                * replies, but on the other hand we want to make sure the
+                * client has enough time to resend if the rpc is lost. So
+                * during the recovery period send at least 4 early replies,
+                * spacing them every at_extra if we can. at_estimate should
+                * always equal this fixed value during recovery. */
+               at_measured(&svcpt->scp_at_estimate,
+                           cfs_time_current_sec() -
+                           req->rq_arrival_time.tv_sec + min(at_extra,
                            req->rq_export->exp_obd->obd_recovery_timeout / 4));
        } else {
                /* We want to extend the request deadline by at_extra seconds,
@@ -1316,17 +1318,17 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                            cfs_time_current_sec() -
                            req->rq_arrival_time.tv_sec);
 
-               /* Check to see if we've actually increased the deadline -
-                * we may be past adaptive_max */
-               if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
-                   at_get(&svcpt->scp_at_estimate)) {
-                       DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
-                                 "(%ld/%ld), not sending early reply\n",
-                                 olddl, req->rq_arrival_time.tv_sec +
-                                 at_get(&svcpt->scp_at_estimate) -
-                                 cfs_time_current_sec());
-                       RETURN(-ETIMEDOUT);
-               }
+       }
+       /* Check to see if we've actually increased the deadline -
+        * we may be past adaptive_max */
+       if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
+           at_get(&svcpt->scp_at_estimate)) {
+               DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
+                         "(%ld/%ld), not sending early reply\n",
+                         olddl, req->rq_arrival_time.tv_sec +
+                         at_get(&svcpt->scp_at_estimate) -
+                         cfs_time_current_sec());
+               RETURN(-ETIMEDOUT);
        }
 
        reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
index 42c39fd..74d0d24 100755 (executable)
@@ -396,6 +396,27 @@ test_8e() {
 }
 run_test 8e "Verify that ptlrpc resends request on -EINPROGRESS"
 
+test_9() {
+       [ $(lustre_version_code ost1) -ge $(version_code 2.6.54) ] ||
+               { skip "Need OST version at least 2.6.54"; return; }
+       $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+       replay_barrier ost1
+       # do IO
+       dd if=/dev/zero of=$DIR/$tfile count=1 bs=1M > /dev/null ||
+               error "failed to write"
+       # failover, replay and resend replayed waiting request
+       #define OBD_FAIL_TGT_REPLAY_DELAY2       0x714
+       do_facet ost1 $LCTL set_param fail_loc=0x00000714
+       do_facet ost1 $LCTL set_param fail_val=$TIMEOUT
+       fail ost1
+       do_facet ost1 $LCTL set_param fail_loc=0
+       do_facet ost1 "dmesg | tail -n 100" |\
+               sed -n '/no req deadline/,$ p' | grep -q 'Already past' &&
+               return 1
+       return 0
+}
+run_test 9 "Verify that no req deadline happened during recovery"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status