LU-5079 ptlrpc: fix early reply timeout for recovery

author Alexander Boyko <alexander_boyko@xyratex.com>

Wed, 29 Oct 2014 21:59:12 +0000 (14:59 -0700)

committer Oleg Drokin <oleg.drokin@intel.com>

Thu, 6 Nov 2014 18:36:48 +0000 (18:36 +0000)
author Alexander Boyko <alexander_boyko@xyratex.com>
Wed, 29 Oct 2014 21:59:12 +0000 (14:59 -0700)
committer Oleg Drokin <oleg.drokin@intel.com>
Thu, 6 Nov 2014 18:36:48 +0000 (18:36 +0000)
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h

index ace96fa..a43bfbd 100644 (file)
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -419,6 +419,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
  #define OBD_FAIL_TGT_LAST_REPLAY         0x710
  #define OBD_FAIL_TGT_CLIENT_ADD          0x711
  #define OBD_FAIL_TGT_RCVG_FLAG           0x712
+#define OBD_FAIL_TGT_REPLAY_DELAY2      0x714
  
  #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
  #define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index eb8a58b..bfae4cc 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1829,17 +1829,19 @@ repeat:
  
  static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd)
  {
-        struct ptlrpc_request *req = NULL;
-        ENTRY;
+       struct ptlrpc_request *req = NULL;
+       ENTRY;
  
-        CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
-               obd->obd_next_recovery_transno);
+       CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
+               obd->obd_next_recovery_transno);
  
-        if (target_recovery_overseer(obd, check_for_next_transno,
-                                     exp_req_replay_healthy)) {
-                abort_req_replay_queue(obd);
-                abort_lock_replay_queue(obd);
-        }
+       CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
+
+       if (target_recovery_overseer(obd, check_for_next_transno,
+                                    exp_req_replay_healthy)) {
+               abort_req_replay_queue(obd);
+               abort_lock_replay_queue(obd);
+       }
  
         spin_lock(&obd->obd_recovery_task_lock);
         if (!cfs_list_empty(&obd->obd_req_replay_queue)) {
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index a74d51d..22bc111 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -1313,16 +1313,18 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                  RETURN(-ENOSYS);
          }
  
-        if (req->rq_export &&
-            lustre_msg_get_flags(req->rq_reqmsg) &
-            (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
-                /* During recovery, we don't want to send too many early
-                 * replies, but on the other hand we want to make sure the
-                 * client has enough time to resend if the rpc is lost. So
-                 * during the recovery period send at least 4 early replies,
-                 * spacing them every at_extra if we can. at_estimate should
-                 * always equal this fixed value during recovery. */
-               at_measured(&svcpt->scp_at_estimate, min(at_extra,
+       if (req->rq_export &&
+           lustre_msg_get_flags(req->rq_reqmsg) &
+           (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+               /* During recovery, we don't want to send too many early
+                * replies, but on the other hand we want to make sure the
+                * client has enough time to resend if the rpc is lost. So
+                * during the recovery period send at least 4 early replies,
+                * spacing them every at_extra if we can. at_estimate should
+                * always equal this fixed value during recovery. */
+               at_measured(&svcpt->scp_at_estimate,
+                           cfs_time_current_sec() -
+                           req->rq_arrival_time.tv_sec + min(at_extra,
                             req->rq_export->exp_obd->obd_recovery_timeout / 4));
         } else {
                 /* We want to extend the request deadline by at_extra seconds,
@@ -1336,17 +1338,17 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                             cfs_time_current_sec() -
                             req->rq_arrival_time.tv_sec);
  
-               /* Check to see if we've actually increased the deadline -
-                * we may be past adaptive_max */
-               if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
-                   at_get(&svcpt->scp_at_estimate)) {
-                       DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
-                                 "(%ld/%ld), not sending early reply\n",
-                                 olddl, req->rq_arrival_time.tv_sec +
-                                 at_get(&svcpt->scp_at_estimate) -
-                                 cfs_time_current_sec());
-                       RETURN(-ETIMEDOUT);
-               }
+       }
+       /* Check to see if we've actually increased the deadline -
+        * we may be past adaptive_max */
+       if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
+           at_get(&svcpt->scp_at_estimate)) {
+               DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
+                         "(%ld/%ld), not sending early reply\n",
+                         olddl, req->rq_arrival_time.tv_sec +
+                         at_get(&svcpt->scp_at_estimate) -
+                         cfs_time_current_sec());
+               RETURN(-ETIMEDOUT);
         }
  
         reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh

index 42c39fd..52d894e 100755 (executable)
--- a/lustre/tests/replay-ost-single.sh
+++ b/lustre/tests/replay-ost-single.sh
@@ -396,6 +396,34 @@ test_8e() {
  }
  run_test 8e "Verify that ptlrpc resends request on -EINPROGRESS"
  
+test_9() {
+       local server_version=$(lustre_version_code ost1)
+
+       [[ $server_version -ge $(version_code 2.6.55) ]] ||
+               [[ $server_version -ge $(version_code 2.5.27) &&
+                  $server_version -lt $(version_code 2.5.50) ]] ||
+               [[ $server_version -ge $(version_code 2.5.3) &&
+                  $server_version -lt $(version_code 2.5.11) ]] ||
+               { skip "Need OST version 2.5.4+ or 2.5.27+ or 2.6.55+"; return; }
+
+       $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+       replay_barrier ost1
+       # do IO
+       dd if=/dev/zero of=$DIR/$tfile count=1 bs=1M > /dev/null ||
+               error "failed to write"
+       # failover, replay and resend replayed waiting request
+       #define OBD_FAIL_TGT_REPLAY_DELAY2       0x714
+       do_facet ost1 $LCTL set_param fail_loc=0x00000714
+       do_facet ost1 $LCTL set_param fail_val=$TIMEOUT
+       fail ost1
+       do_facet ost1 $LCTL set_param fail_loc=0
+       do_facet ost1 "dmesg | tail -n 100" |\
+               sed -n '/no req deadline/,$ p' | grep -q 'Already past' &&
+               return 1
+       return 0
+}
+run_test 9 "Verify that no req deadline happened during recovery"
+
  complete $SECONDS
  check_and_cleanup_lustre
  exit_status
diff --git a/lustre/tests/replay-vbr.sh b/lustre/tests/replay-vbr.sh

index 4018a9b..f633a0d 100644 (file)
--- a/lustre/tests/replay-vbr.sh
+++ b/lustre/tests/replay-vbr.sh
@@ -711,6 +711,12 @@ test_7_cycle() {
      do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
      do_facet $SINGLEMDS "$LCTL set_param mdt.${!var}.commit_on_sharing=0"
  
+       local at_max_saved=0
+       if at_is_enabled; then
+               at_max_saved=$(at_max_get $SINGLEMDS)
+               at_max_set $TIMEOUT mds client
+       fi
+
      do_node $CLIENT1 mkdir -p $DIR/$tdir
      replay_barrier $SINGLEMDS
      # first operation
@@ -727,11 +733,12 @@ test_7_cycle() {
      # should fail as conflict expected
      client_evicted $CLIENT1 || rc=1
  
-    wait_recovery_complete $SINGLEMDS
+       wait_recovery_complete $SINGLEMDS
         wait_mds_ost_sync || error "wait_mds_ost_sync failed"
  
-    rm -rf $DIR/$tdir
-    return $rc
+       [[ $at_max_saved -eq 0 ]] || at_max_set $at_max_saved mds client
+       rm -rf $DIR/$tdir
+       return $rc
  }
  
  test_7a() {
author	Alexander Boyko <alexander_boyko@xyratex.com>
	Wed, 29 Oct 2014 21:59:12 +0000 (14:59 -0700)
committer	Oleg Drokin <oleg.drokin@intel.com>
	Thu, 6 Nov 2014 18:36:48 +0000 (18:36 +0000)
lustre/include/obd_support.h		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history
lustre/tests/replay-ost-single.sh		patch \| blob \| history
lustre/tests/replay-vbr.sh		patch \| blob \| history