LU-5079 ptlrpc: fix early reply timeout for recovery

author Alexander.Boyko <alexander_boyko@xyratex.com>

Thu, 24 Jul 2014 14:35:44 +0000 (18:35 +0400)

committer Andreas Dilger <andreas.dilger@intel.com>

Tue, 21 Oct 2014 18:06:38 +0000 (18:06 +0000)
author Alexander.Boyko <alexander_boyko@xyratex.com>
Thu, 24 Jul 2014 14:35:44 +0000 (18:35 +0400)
committer Andreas Dilger <andreas.dilger@intel.com>
Tue, 21 Oct 2014 18:06:38 +0000 (18:06 +0000)
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h

index ff61ede..3257997 100644 (file)
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -417,6 +417,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
  #define OBD_FAIL_TGT_CLIENT_ADD          0x711
  #define OBD_FAIL_TGT_RCVG_FLAG           0x712
  #define OBD_FAIL_TGT_DELAY_CONDITIONAL  0x713
+#define OBD_FAIL_TGT_REPLAY_DELAY2       0x714
  
  #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
  #define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index 46ee0e8..0c878dc 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1806,17 +1806,19 @@ repeat:
  
  static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd)
  {
-        struct ptlrpc_request *req = NULL;
-        ENTRY;
+       struct ptlrpc_request *req = NULL;
+       ENTRY;
  
-        CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
-               obd->obd_next_recovery_transno);
+       CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
+               obd->obd_next_recovery_transno);
  
-        if (target_recovery_overseer(obd, check_for_next_transno,
-                                     exp_req_replay_healthy)) {
-                abort_req_replay_queue(obd);
-                abort_lock_replay_queue(obd);
-        }
+       CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
+
+       if (target_recovery_overseer(obd, check_for_next_transno,
+                                    exp_req_replay_healthy)) {
+               abort_req_replay_queue(obd);
+               abort_lock_replay_queue(obd);
+       }
  
         spin_lock(&obd->obd_recovery_task_lock);
         if (!list_empty(&obd->obd_req_replay_queue)) {
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index 71e4804..706b042 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -1293,16 +1293,18 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                  RETURN(-ENOSYS);
          }
  
-        if (req->rq_export &&
-            lustre_msg_get_flags(req->rq_reqmsg) &
-            (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
-                /* During recovery, we don't want to send too many early
-                 * replies, but on the other hand we want to make sure the
-                 * client has enough time to resend if the rpc is lost. So
-                 * during the recovery period send at least 4 early replies,
-                 * spacing them every at_extra if we can. at_estimate should
-                 * always equal this fixed value during recovery. */
-               at_measured(&svcpt->scp_at_estimate, min(at_extra,
+       if (req->rq_export &&
+           lustre_msg_get_flags(req->rq_reqmsg) &
+           (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+               /* During recovery, we don't want to send too many early
+                * replies, but on the other hand we want to make sure the
+                * client has enough time to resend if the rpc is lost. So
+                * during the recovery period send at least 4 early replies,
+                * spacing them every at_extra if we can. at_estimate should
+                * always equal this fixed value during recovery. */
+               at_measured(&svcpt->scp_at_estimate,
+                           cfs_time_current_sec() -
+                           req->rq_arrival_time.tv_sec + min(at_extra,
                             req->rq_export->exp_obd->obd_recovery_timeout / 4));
         } else {
                 /* We want to extend the request deadline by at_extra seconds,
@@ -1316,17 +1318,17 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                             cfs_time_current_sec() -
                             req->rq_arrival_time.tv_sec);
  
-               /* Check to see if we've actually increased the deadline -
-                * we may be past adaptive_max */
-               if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
-                   at_get(&svcpt->scp_at_estimate)) {
-                       DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
-                                 "(%ld/%ld), not sending early reply\n",
-                                 olddl, req->rq_arrival_time.tv_sec +
-                                 at_get(&svcpt->scp_at_estimate) -
-                                 cfs_time_current_sec());
-                       RETURN(-ETIMEDOUT);
-               }
+       }
+       /* Check to see if we've actually increased the deadline -
+        * we may be past adaptive_max */
+       if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
+           at_get(&svcpt->scp_at_estimate)) {
+               DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
+                         "(%ld/%ld), not sending early reply\n",
+                         olddl, req->rq_arrival_time.tv_sec +
+                         at_get(&svcpt->scp_at_estimate) -
+                         cfs_time_current_sec());
+               RETURN(-ETIMEDOUT);
         }
  
         reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh

index 42c39fd..74d0d24 100755 (executable)
--- a/lustre/tests/replay-ost-single.sh
+++ b/lustre/tests/replay-ost-single.sh
@@ -396,6 +396,27 @@ test_8e() {
  }
  run_test 8e "Verify that ptlrpc resends request on -EINPROGRESS"
  
+test_9() {
+       [ $(lustre_version_code ost1) -ge $(version_code 2.6.54) ] ||
+               { skip "Need OST version at least 2.6.54"; return; }
+       $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+       replay_barrier ost1
+       # do IO
+       dd if=/dev/zero of=$DIR/$tfile count=1 bs=1M > /dev/null ||
+               error "failed to write"
+       # failover, replay and resend replayed waiting request
+       #define OBD_FAIL_TGT_REPLAY_DELAY2       0x714
+       do_facet ost1 $LCTL set_param fail_loc=0x00000714
+       do_facet ost1 $LCTL set_param fail_val=$TIMEOUT
+       fail ost1
+       do_facet ost1 $LCTL set_param fail_loc=0
+       do_facet ost1 "dmesg | tail -n 100" |\
+               sed -n '/no req deadline/,$ p' | grep -q 'Already past' &&
+               return 1
+       return 0
+}
+run_test 9 "Verify that no req deadline happened during recovery"
+
  complete $SECONDS
  check_and_cleanup_lustre
  exit_status
author	Alexander.Boyko <alexander_boyko@xyratex.com>
	Thu, 24 Jul 2014 14:35:44 +0000 (18:35 +0400)
committer	Andreas Dilger <andreas.dilger@intel.com>
	Tue, 21 Oct 2014 18:06:38 +0000 (18:06 +0000)
lustre/include/obd_support.h		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history
lustre/tests/replay-ost-single.sh		patch \| blob \| history