b=13537

author nathan <nathan>

Mon, 24 Sep 2007 16:53:05 +0000 (16:53 +0000)

committer nathan <nathan>

Mon, 24 Sep 2007 16:53:05 +0000 (16:53 +0000)
author nathan <nathan>
Mon, 24 Sep 2007 16:53:05 +0000 (16:53 +0000)
committer nathan <nathan>
Mon, 24 Sep 2007 16:53:05 +0000 (16:53 +0000)
diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h

index cef9318..6dc3840 100644 (file)
--- a/lustre/include/lustre_import.h
+++ b/lustre/include/lustre_import.h
@@ -13,7 +13,6 @@
  #define D_ADAPTTO D_OTHER
  #define AT_BINS 4                  /* "bin" means "N seconds of history" */
  #define AT_FLG_NOHIST 0x1          /* use last reported value only */
-#define AT_FLG_MIN    0x2          /* use a minimum limit */
  
  struct adaptive_timeout {
          time_t       at_binstart;         /* bin start time */
@@ -70,7 +69,6 @@ struct imp_at {
          int                     iat_portal[IMP_AT_MAX_PORTALS];
          struct adaptive_timeout iat_net_latency;
          struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
-        time_t                  iat_drain; /* hack to slow reconnect reqs */
  };
  
  struct obd_import {
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h

index 8bd5753..9b742d3 100644 (file)
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -38,7 +38,6 @@ extern unsigned int obd_dump_on_eviction;
     networking / disk / timings affected by load (use Adaptive Timeouts) */
  extern unsigned int obd_timeout;          /* seconds */
  extern unsigned int ldlm_timeout;         /* seconds */
-extern unsigned int adaptive_timeout_min; /* seconds */
  extern unsigned int adaptive_timeout_max; /* seconds */
  extern unsigned int adaptive_timeout_history; /* seconds */
  extern unsigned int obd_sync_filter;
@@ -60,9 +59,14 @@ extern unsigned int obd_alloc_fail_rate;
  #define PING_EVICT_TIMEOUT (PING_INTERVAL * 5 / 2)
  #define DISK_TIMEOUT 50          /* Beyond this we warn about disk speed */
  #define CONNECTION_SWITCH_MIN 5  /* Connection switching rate limiter */
+#define CONNECTION_SWITCH_MAX 50 /* Max connect interval for nonresponsive
+                                    servers; keep this within the recovery
+                                    period */
+#define CONNECTION_SWITCH_INC 5  /* Connection timeout backoff */
  #ifndef CRAY_XT3
  /* In general this should be low to have quick detection of a system 
-   running on a backup server. */
+   running on a backup server. (If it's too low, import_select_connection
+   will increase the timeout anyhow.)  */
  #define INITIAL_CONNECT_TIMEOUT max_t(int,CONNECTION_SWITCH_MIN,obd_timeout/20)
  #else
  /* ...but for very large systems (e.g. CRAY) we need to keep the initial 
@@ -71,7 +75,6 @@ extern unsigned int obd_alloc_fail_rate;
     chance to generate adaptive timeout data. */
  #define INITIAL_CONNECT_TIMEOUT max_t(int,CONNECTION_SWITCH_MIN,obd_timeout/2)
  #endif
-#define LND_TIMEOUT 50           /* LNET LND-level RPC timeout */
  #define LONG_UNLINK 300          /* Unlink should happen before now */
  
  
diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c

index 874798c..bad9801 100644 (file)
--- a/lustre/lov/lproc_lov.c
+++ b/lustre/lov/lproc_lov.c
@@ -135,7 +135,7 @@ static int lov_rd_stripecount(char *page, char **start, off_t off, int count,
          LASSERT(dev != NULL);
          desc = &dev->u.lov.desc;
          *eof = 1;
-        return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
+        return snprintf(page, count, "%d\n", desc->ld_default_stripe_count);
  }
  
  static int lov_wr_stripecount(struct file *file, const char *buffer,
diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c

index 632d7e3..eb82860 100644 (file)
--- a/lustre/obdclass/class_obd.c
+++ b/lustre/obdclass/class_obd.c
@@ -65,8 +65,6 @@ unsigned int obd_dump_on_timeout;
  unsigned int obd_dump_on_eviction;
  unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
  unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */
-/* Covers the maximum expected network latency */
-unsigned int adaptive_timeout_min = 10;           /* seconds */
  unsigned int adaptive_timeout_max = 600;          /* seconds */
  /* We remember the slowest event that took place within history */
  unsigned int adaptive_timeout_history = 600;      /* seconds */
@@ -388,7 +386,6 @@ EXPORT_SYMBOL(obd_dump_on_timeout);
  EXPORT_SYMBOL(obd_dump_on_eviction);
  EXPORT_SYMBOL(obd_timeout);
  EXPORT_SYMBOL(ldlm_timeout);
-EXPORT_SYMBOL(adaptive_timeout_min);
  EXPORT_SYMBOL(adaptive_timeout_max);
  EXPORT_SYMBOL(adaptive_timeout_history);
  EXPORT_SYMBOL(obd_max_dirty_pages);
diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c

index 470cae7..81765de 100644 (file)
--- a/lustre/obdclass/genops.c
+++ b/lustre/obdclass/genops.c
@@ -793,11 +793,7 @@ EXPORT_SYMBOL(class_import_put);
  
  static void init_imp_at(struct imp_at *at) {
          int i;
-        /* We need enough time to get an early response on a slow network.
-           Since we can't say for sure how slow a network might be, we use
-           a user-defined max expected network latency. We will adapt to slow
-           increases, but a sudden jump can still kill us. */
-        at_init(&at->iat_net_latency, adaptive_timeout_min, AT_FLG_MIN);
+        at_init(&at->iat_net_latency, CONNECTION_SWITCH_INC, 0);
          for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
                  /* max service estimates are tracked on the server side, so
                     don't use the AT history here, just use the last reported
@@ -805,7 +801,6 @@ static void init_imp_at(struct imp_at *at) {
                  at_init(&at->iat_service_estimate[i], INITIAL_CONNECT_TIMEOUT,
                          AT_FLG_NOHIST);
          }
-        at->iat_drain = 0;
  }
  
  struct obd_import *class_new_import(struct obd_device *obd)
diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c

index 85cba69..0d36662 100644 (file)
--- a/lustre/obdclass/linux/linux-sysctl.c
+++ b/lustre/obdclass/linux/linux-sysctl.c
@@ -62,7 +62,6 @@ enum {
          OBD_DUMP_ON_EVICTION,   /* dump kernel debug log upon eviction */
          OBD_DEBUG_PEER_ON_TIMEOUT, /* dump peer debug when RPC times out */
          OBD_ALLOC_FAIL_RATE,    /* memory allocation random failure rate */
-        ADAPTIVE_MIN,           /* Adaptive timeout lower limit */
          ADAPTIVE_MAX,           /* Adaptive timeout upper limit */
          ADAPTIVE_HISTORY,       /* Adaptive timeout timebase */
  };
@@ -199,14 +198,6 @@ static cfs_sysctl_table_t obd_table[] = {
          },
  #endif
          {
-                .ctl_name = ADAPTIVE_MIN,
-                .procname = "adaptive_min",
-                .data     = &adaptive_timeout_min,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        },
-        {
                  .ctl_name = ADAPTIVE_MAX,
                  .procname = "adaptive_max",
                  .data     = &adaptive_timeout_max,
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 85e23ed..0eaddcd 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -215,14 +215,6 @@ static void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
          /* We could get even fancier here, using history to predict increased
             loading... */
               
-        if (at->iat_drain > req->rq_timeout) {
-                /* If we're trying to drain the network queues, give this 
-                   req a long timeout */
-                req->rq_timeout = at->iat_drain;
-                CDEBUG(D_ADAPTTO, "waiting %ds to let queues drain\n",
-                       req->rq_timeout);
-        }
-
          /* Let the server know what this RPC timeout is by putting it in the 
             reqmsg*/
          lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
@@ -271,7 +263,7 @@ static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req)
          
          /* Network latency is total time less server processing time */
          nl = max_t(int, now - req->rq_sent - st, 0) + 1/*st rounding*/;
-        if (st > now - req->rq_sent + 1 /* rounding */) 
+        if (st > now - req->rq_sent + 2 /* rounding */)
                  CERROR("Reported service time %u > total measured time %ld\n",
                         st, now - req->rq_sent);
  
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c

index a058f6b..327c2b7 100644 (file)
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -188,8 +188,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
          struct list_head *tmp, *n;
          struct ptlrpc_request *req;
          struct l_wait_info lwi;
-        time_t last = 0;
-        int timeout, rc = 0;
+        int rc;
  
          atomic_inc(&imp->imp_inval_count);
  
@@ -198,28 +197,16 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
  
          LASSERT(imp->imp_invalid);
  
-        /* wait for all requests to error out and call completion callbacks */
-        spin_lock(&imp->imp_lock);
-        list_for_each_safe(tmp, n, &imp->imp_sending_list) {
-                req = list_entry(tmp, struct ptlrpc_request, rq_list);
-                last = max(last, req->rq_deadline);
-        }
-        list_for_each_safe(tmp, n, &imp->imp_delayed_list) {
-                req = list_entry(tmp, struct ptlrpc_request, rq_list);
-                last = max(last, req->rq_deadline);
-        }
-        spin_unlock(&imp->imp_lock);
+        /* wait for all requests to error out and call completion callbacks.
+           Cap it at obd_timeout -- these should all have been locally
+           cancelled by ptlrpc_abort_inflight. */
+        lwi = LWI_TIMEOUT_INTERVAL(
+                cfs_timeout_cap(cfs_time_seconds(obd_timeout)),
+                cfs_time_seconds(1), NULL, NULL);
+        rc = l_wait_event(imp->imp_recovery_waitq,
+                          (atomic_read(&imp->imp_inflight) == 0), &lwi);
  
-        timeout = (int)(last - cfs_time_current_sec());
-        if (timeout > 0) {
-                lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(timeout),
-                                           cfs_time_seconds(1), NULL, NULL);
-                rc = l_wait_event(imp->imp_recovery_waitq,
-                                  (atomic_read(&imp->imp_inflight) == 0),
-                                  &lwi);
-        }
-
-        if (atomic_read(&imp->imp_inflight)) {
+        if (rc) {
                  CERROR("%s: rc = %d waiting for callback (%d != 0)\n",
                         obd2cli_tgt(imp->imp_obd), rc,
                         atomic_read(&imp->imp_inflight));
@@ -339,18 +326,20 @@ static int import_select_connection(struct obd_import *imp)
          LASSERT(imp_conn->oic_conn);
  
          /* If we've tried everything, and we're back to the beginning of the
-           list, wait for LND_TIMEOUT to give the queues a chance to drain. */
+           list, increase our timeout and try again. It will be reset when
+           we do finally connect. (FIXME: really we should wait for all network
+           state associated with the last connection attempt to drain before
+           trying to reconnect on it.) */
          if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
-                int must_wait;
+                if (at_get(&imp->imp_at.iat_net_latency) <
+                    CONNECTION_SWITCH_MAX) {
+                        at_add(&imp->imp_at.iat_net_latency,
+                               at_get(&imp->imp_at.iat_net_latency) +
+                               CONNECTION_SWITCH_INC);
+                }
                  LASSERT(imp_conn->oic_last_attempt);
-                must_wait = LND_TIMEOUT -
-                        (int)cfs_duration_sec(cfs_time_current_64() - 
-                                              imp_conn->oic_last_attempt);
-                imp->imp_at.iat_drain = max(0, must_wait);
-                CWARN("Tried all connections, %lus drain time\n",
-                      imp->imp_at.iat_drain);
-        } else {
-                imp->imp_at.iat_drain = 0;
+                CWARN("Tried all connections, increasing latency to %ds\n",
+                      at_get(&imp->imp_at.iat_net_latency));
          }
  
          imp_conn->oic_last_attempt = cfs_time_current_64();
@@ -568,7 +557,6 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
          ENTRY;
  
          spin_lock(&imp->imp_lock);
-        imp->imp_at.iat_drain = 0;
          if (imp->imp_state == LUSTRE_IMP_CLOSED) {
                  spin_unlock(&imp->imp_lock);
                  RETURN(0);
@@ -1174,10 +1162,6 @@ int at_add(struct adaptive_timeout *at, unsigned int val)
                  at->at_binstart += shift * binlimit;
          }
  
-        if ((at->at_flags & AT_FLG_MIN) && 
-            (at->at_current < adaptive_timeout_min))
-                at->at_current = adaptive_timeout_min;
-
          if (at->at_current > at->at_worst_ever) {
                  at->at_worst_ever = at->at_current;
                  at->at_worst_time = now;
diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c

index 6f49146..af74dc7 100644 (file)
--- a/lustre/ptlrpc/pinger.c
+++ b/lustre/ptlrpc/pinger.c
@@ -67,11 +67,12 @@ int ptlrpc_ping(struct obd_import *imp)
  void ptlrpc_update_next_ping(struct obd_import *imp)
  {
  #ifdef ENABLE_PINGER
-        int time = (imp->imp_state != LUSTRE_IMP_DISCON) ? PING_INTERVAL :
-                /* FIXME should this be limited to LND_TIMEOUT so we don't
-                   build up pings in LND output queues? */
-                max_t(int, CONNECTION_SWITCH_MIN, 
-                      at_get(&imp->imp_at.iat_net_latency));
+        int time = PING_INTERVAL;
+        if (imp->imp_state == LUSTRE_IMP_DISCON) {
+                int dtime = max_t(int, CONNECTION_SWITCH_MIN,
+                                  at_get(&imp->imp_at.iat_net_latency));
+                time = min(time, dtime);
+        }
          imp->imp_next_ping = cfs_time_shift(time);
  #endif /* ENABLE_PINGER */
  }
author	nathan <nathan>
	Mon, 24 Sep 2007 16:53:05 +0000 (16:53 +0000)
committer	nathan <nathan>
	Mon, 24 Sep 2007 16:53:05 +0000 (16:53 +0000)
lustre/include/lustre_import.h		patch \| blob \| history
lustre/include/obd_support.h		patch \| blob \| history
lustre/lov/lproc_lov.c		patch \| blob \| history
lustre/obdclass/class_obd.c		patch \| blob \| history
lustre/obdclass/genops.c		patch \| blob \| history
lustre/obdclass/linux/linux-sysctl.c		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/import.c		patch \| blob \| history
lustre/ptlrpc/pinger.c		patch \| blob \| history