b=13669

author nathan <nathan>

Thu, 20 Sep 2007 17:04:50 +0000 (17:04 +0000)

committer nathan <nathan>

Thu, 20 Sep 2007 17:04:50 +0000 (17:04 +0000)
author nathan <nathan>
Thu, 20 Sep 2007 17:04:50 +0000 (17:04 +0000)
committer nathan <nathan>
Thu, 20 Sep 2007 17:04:50 +0000 (17:04 +0000)
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h

index b4bbad6..8bd5753 100644 (file)
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -170,6 +170,7 @@ extern unsigned int obd_alloc_fail_rate;
  #define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
  #define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
  #define OBD_FAIL_OST_PAUSE_CREATE        0x223
+#define OBD_FAIL_OST_BRW_PAUSE_PACK      0x224
  
  #define OBD_FAIL_LDLM                    0x300
  #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c

index be608a6..a268d7e 100644 (file)
--- a/lustre/ost/ost_handler.c
+++ b/lustre/ost/ost_handler.c
@@ -986,6 +986,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
          rc = lustre_pack_reply(req, 3, size, NULL);
          if (rc != 0)
                  GOTO(out, rc);
+        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_PACK, obd_fail_val);
          rcs = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
                               niocount * sizeof(*rcs));
  
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index e69a5f1..85e23ed 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -1052,14 +1052,14 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
  
                          spin_lock(&req->rq_lock);
  
-                        /* Still waiting for a reply? */
-                        if (req->rq_receiving_reply) {
+                        if (req->rq_early) {
+                                ptlrpc_at_recv_early_reply(req);
                                  spin_unlock(&req->rq_lock);
                                  continue;
                          }
-                        
-                        if (req->rq_early) {
-                                ptlrpc_at_recv_early_reply(req);
+
+                        /* Still waiting for a reply? */
+                        if (req->rq_receiving_reply) {
                                  spin_unlock(&req->rq_lock);
                                  continue;
                          }
diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c

index 29f4f36..9d88451 100644 (file)
--- a/lustre/ptlrpc/pack_generic.c
+++ b/lustre/ptlrpc/pack_generic.c
@@ -347,9 +347,7 @@ static int lustre_pack_reply_v1(struct ptlrpc_request *req, int count,
          ENTRY;
  
          LASSERT(req->rq_reply_state == NULL);
-        if (req->rq_packed_final)
-                /* Already packed final, no more early */
-                RETURN(-EALREADY);
+
          if ((flags & LPRFL_EARLY_REPLY) == 0)
                  req->rq_packed_final = 1;
  
@@ -390,10 +388,7 @@ static int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
          ENTRY;
  
          LASSERT(req->rq_reply_state == NULL);
-        if (req->rq_packed_final) {
-                /* Already packed final, no more early */
-                RETURN(-EALREADY);
-        }
+
          if ((flags & LPRFL_EARLY_REPLY) == 0)
                  req->rq_packed_final = 1;
  
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index 2cf34b0..09fd201 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -262,7 +262,7 @@ ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc)
  static void ptlrpc_at_timer(unsigned long castmeharder)
  {
          struct ptlrpc_service *svc = (struct ptlrpc_service *)castmeharder;
-        CDEBUG(D_ADAPTTO, "at timer %s hit at %ld%s\n",
+        CDEBUG(D_INFO, "at timer %s hit at %ld%s\n",
                 svc->srv_name, cfs_time_current_sec(), 
                 list_empty(&svc->srv_at_list) ? ", empty" : ""); 
          svc->srv_at_check = 1;
@@ -634,7 +634,8 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
          struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
          struct ptlrpc_request *reqcopy;
          struct lustre_msg *reqmsg;
-        long deadline = req->rq_deadline - cfs_time_current_sec();
+        long olddl = req->rq_deadline - cfs_time_current_sec();
+        time_t newdl;
          int rc;
          ENTRY;
                              
@@ -643,25 +644,43 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
          DEBUG_REQ(D_ADAPTTO, req, 
                    "%ssending early reply (deadline %+lds, margin %+lds) for "
                    "%d+%d", AT_OFF ? "AT off - not " : "",
-                  deadline, deadline - at_get(&svc->srv_at_estimate), 
+                  olddl, olddl - at_get(&svc->srv_at_estimate),
                    at_get(&svc->srv_at_estimate), extra_time);
  
          if (AT_OFF) 
                  RETURN(0);
          
-        if (deadline < 0) {
-                CERROR("Already past deadline (%+lds), not sending early "
-                       "reply\n", deadline);
+        if (olddl < 0) {
+                CDEBUG(D_ADAPTTO, "x"LPU64": Already past deadline (%+lds), not"
+                       " sending early reply\n", req->rq_xid, olddl);
                  /* Return an error so we're not re-added to the timed list. */
                  RETURN(-ETIMEDOUT);
          }
  
          if ((lustre_msg_get_flags(req->rq_reqmsg) & MSG_AT_SUPPORT) == 0) {
-                CERROR("Wanted to ask client for more time, but no AT "
-                       "support\n");
+                CDEBUG(D_INFO, "Wanted to ask client for more time, but no AT "
+                      "support\n");
                  RETURN(-ENOSYS);
          }
  
+        if (extra_time) {
+                /* Fake our processing time into the future to ask the
+                   clients for some extra amount of time */
+                extra_time += cfs_time_current_sec() -
+                        req->rq_arrival_time.tv_sec;
+                at_add(&svc->srv_at_estimate, extra_time);
+        }
+
+        newdl = req->rq_arrival_time.tv_sec + at_get(&svc->srv_at_estimate);
+        if (req->rq_deadline >= newdl) {
+                /* We're not adding any time, no need to send an early reply
+                   (e.g. maybe at adaptive_max) */
+                CDEBUG(D_ADAPTTO, "x"LPU64": Couldn't add any time (%ld/%ld), "
+                       "not sending early reply\n", req->rq_xid, olddl,
+                       newdl - cfs_time_current_sec());
+                RETURN(-ETIMEDOUT);
+        }
+
          OBD_ALLOC(reqcopy, sizeof *reqcopy);
          if (reqcopy == NULL)
                  RETURN(-ENOMEM);
@@ -672,7 +691,9 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
          }
          
          *reqcopy = *req;
-        /* We need the reqmsg for the magic */
+        reqcopy->rq_reply_state = NULL;
+        reqcopy->rq_rep_swab_mask = 0;
+        /* We only need the reqmsg for the magic */
          reqcopy->rq_reqmsg = reqmsg;
          memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
  
@@ -680,20 +701,11 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
          if (rc) 
                  GOTO(out, rc);
  
-        if (extra_time) {
-                /* Fake our processing time into the future to ask the 
-                   clients for some extra amount of time */
-                extra_time += cfs_time_current_sec() -
-                        reqcopy->rq_arrival_time.tv_sec;
-                at_add(&svc->srv_at_estimate, extra_time);
-        }
-
          rc = ptlrpc_send_reply(reqcopy, PTLRPC_REPLY_EARLY);
  
          if (!rc) {
                  /* Adjust our own deadline to what we told the client */
-                req->rq_deadline = req->rq_arrival_time.tv_sec + 
-                        at_get(&svc->srv_at_estimate);
+                req->rq_deadline = newdl;
                  req->rq_early_count++; /* number sent, server side */
          } else {
                  DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
@@ -778,8 +790,7 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service *svc)
                     deleted, and is safe to take a ref to keep the req around */
                  atomic_inc(&rq->rq_refcount);
                  spin_unlock(&svc->srv_at_lock);
-                if (!rq->rq_packed_final && 
-                    (ptlrpc_at_send_early_reply(rq, at_extra) == 0)) {
+                if (ptlrpc_at_send_early_reply(rq, at_extra) == 0) {
                          counter++;
                          ptlrpc_at_add_timed(rq);
                  }
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh

index e26ce10..39e05f0 100755 (executable)
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -40,7 +40,8 @@ run_test 3 "stat: drop req, drop rep"
  
  SAMPLE_NAME=recovery-small.junk
  SAMPLE_FILE=$TMP/$SAMPLE_NAME
-dd if=/dev/urandom of=$SAMPLE_FILE bs=1K count=4
+# make this big, else test 9 doesn't wait for bulk -- bz 5595
+dd if=/dev/urandom of=$SAMPLE_FILE bs=1M count=4
  
  test_4() {
      do_facet client "cp $SAMPLE_FILE $MOUNT/$SAMPLE_NAME" || return 1
@@ -195,6 +196,10 @@ test_16() {
  run_test 16 "timeout bulk put, don't evict client (2732)"
  
  test_17() {
+    # With adaptive timeouts, bulk_get won't expire until adaptive_timeout_max
+    OST_AT_MAX=$(do_facet ost1 sysctl -n lustre.adaptive_max)
+    do_facet ost1 sysctl -w lustre.adaptive_max=$TIMEOUT
+
      # OBD_FAIL_PTLRPC_BULK_GET_NET 0x0503 | OBD_FAIL_ONCE
      # OST bulk will time out here, client retries
      do_facet ost1 sysctl -w lustre.fail_loc=0x80000503
@@ -202,12 +207,16 @@ test_17() {
      do_facet client cp $SAMPLE_FILE $DIR/$tfile
      sync
  
-    sleep $TIMEOUT
+    # with AT, client will wait adaptive_max*factor+net_latency before
+    # expiring the req, hopefully timeout*2 is enough
+    sleep $(($TIMEOUT*2))
+
      do_facet ost1 sysctl -w lustre.fail_loc=0
      do_facet client "df $DIR"
      # expect cmp to succeed, client resent bulk
      do_facet client "cmp $SAMPLE_FILE $DIR/$tfile" || return 3
      do_facet client "rm $DIR/$tfile" || return 4
+    do_facet ost1 sysctl -w lustre.adaptive_max=$OST_AT_MAX
      return 0
  }
  run_test 17 "timeout bulk get, don't evict client (2732)"
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh

index 86f53e0..a1371a5 100755 (executable)
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -1188,7 +1188,7 @@ at_start() #bug 3055
      fi
  }
  
-test_65() #bug 3055
+test_65a() #bug 3055
  {
      at_start
      $LCTL dk > /dev/null
@@ -1200,12 +1200,42 @@ test_65() #bug 3055
      unlinkmany $DIR/$tfile 10 > /dev/null
      # check for log message
      $LCTL dk | grep "Early reply #" || error "No early reply" 
-    # client should show 30s timeouts
+    # client should show 30s estimates
      grep portal $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts
      sleep 9
      grep portal $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts
  }
-run_test 65 "AT: verify early replies"
+run_test 65a "AT: verify early replies"
+
+test_65b() #bug 3055
+{
+    at_start
+    # turn on D_ADAPTTO
+    debugsave
+    sysctl -w lnet.debug="+other"
+    $LCTL dk > /dev/null
+    # slow down bulk i/o
+    do_facet ost1 sysctl -w lustre.fail_val=30
+#define OBD_FAIL_OST_BRW_PAUSE_PACK      0x224
+    do_facet ost1 sysctl -w lustre.fail_loc=0x224
+
+    rm -f $DIR/$tfile
+    lfs setstripe $DIR/$tfile --index=0 --count=1
+    # force some real bulk transfer
+    dd if=/dev/urandom of=$TMP/big bs=1M count=4
+    cp $TMP/big $DIR/$tfile
+    echo "append" >> $DIR/$tfile
+    cat $DIR/$tfile >> /dev/null
+    rm $TMP/big
+
+    do_facet ost1 sysctl -w lustre.fail_loc=0
+    # check for log message
+    $LCTL dk | grep "Early reply #" || error "No early reply"
+    debugrestore
+    # client should show 30s estimates
+    grep portal $LPROC/osc/${FSNAME}-OST0000-osc-*/timeouts
+}
+run_test 65b "AT: verify early replies on packed reply / bulk"
  
  test_66a() #bug 3055
  {
author	nathan <nathan>
	Thu, 20 Sep 2007 17:04:50 +0000 (17:04 +0000)
committer	nathan <nathan>
	Thu, 20 Sep 2007 17:04:50 +0000 (17:04 +0000)
lustre/include/obd_support.h		patch \| blob \| history
lustre/ost/ost_handler.c		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/pack_generic.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history
lustre/tests/recovery-small.sh		patch \| blob \| history
lustre/tests/replay-single.sh		patch \| blob \| history