LU-15246 ptlrpc: per-device adaptive timeout parameters

author Lei Feng <flei@whamcloud.com>

Thu, 18 Nov 2021 00:34:46 +0000 (08:34 +0800)

committer Oleg Drokin <green@whamcloud.com>

Wed, 25 Oct 2023 18:04:55 +0000 (18:04 +0000)
author Lei Feng <flei@whamcloud.com>
Thu, 18 Nov 2021 00:34:46 +0000 (08:34 +0800)
committer Oleg Drokin <green@whamcloud.com>
Wed, 25 Oct 2023 18:04:55 +0000 (18:04 +0000)
diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h

index df9d5f8..2209bc8 100644 (file)
--- a/lustre/include/lprocfs_status.h
+++ b/lustre/include/lprocfs_status.h
@@ -877,6 +877,31 @@ ssize_t lustre_attr_store(struct kobject *kobj, struct attribute *attr,
  
  extern const struct sysfs_ops lustre_sysfs_ops;
  
+#define LUSTRE_OBD_UINT_PARAM_ATTR(name)\
+static ssize_t name##_show(struct kobject *kobj, struct attribute *attr,\
+                          char *buf)                                   \
+{                                                                      \
+       int rc;                                                         \
+       struct obd_device *obd =                                        \
+               container_of(kobj, struct obd_device, obd_kset.kobj);   \
+       rc = snprintf(buf, PAGE_SIZE, "%u\n", obd->obd_##name); \
+       return rc;                                                      \
+}                                                                      \
+static ssize_t name##_store(struct kobject *kobj, struct attribute *attr,\
+                           const char *buffer, size_t count)           \
+{                                                                      \
+       int rc;                                                         \
+       unsigned int val;                                               \
+       struct obd_device *obd =                                        \
+               container_of(kobj, struct obd_device, obd_kset.kobj);   \
+       rc = kstrtouint(buffer, 10, &val);                              \
+       if (rc)                                                         \
+               return rc;                                              \
+       obd->obd_##name = val;                                          \
+       return count;                                                   \
+}                                                                      \
+LUSTRE_RW_ATTR(name)
+
  /* lproc_ptlrpc.c */
  struct ptlrpc_request;
  extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h

index 33b5f0d..38776a2 100644 (file)
--- a/lustre/include/lustre_dlm.h
+++ b/lustre/include/lustre_dlm.h
@@ -1808,7 +1808,6 @@ int ldlm_cli_inodebits_convert(struct ldlm_lock *lock,
  
  /** @} ldlm_cli_api */
  
-extern unsigned int ldlm_enqueue_min;
  
  /* mds/handler.c */
  /* This has to be here because recursive inclusion sucks. */
diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h

index a4df13e..b19a933 100644 (file)
--- a/lustre/include/lustre_import.h
+++ b/lustre/include/lustre_import.h
@@ -90,9 +90,9 @@ struct ptlrpc_at_array {
  
  #define IMP_AT_MAX_PORTALS 8
  struct imp_at {
-        int                     iat_portal[IMP_AT_MAX_PORTALS];
-        struct adaptive_timeout iat_net_latency;
-        struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
+       int                     iat_portal[IMP_AT_MAX_PORTALS];
+       struct adaptive_timeout iat_net_latency;
+       struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
  };
  
  
@@ -252,7 +252,7 @@ struct obd_import {
         int                       imp_initiated_at;
          /** Incremented every time we send reconnection request */
          __u32                     imp_conn_cnt;
-       /** 
+       /**
          * \see ptlrpc_free_committed remembers imp_generation value here
          * after a check to save on unnecessary replay list iterations
          */
@@ -409,17 +409,9 @@ static inline void at_reinit(struct adaptive_timeout *at, timeout_t timeout,
         spin_unlock(&at->at_lock);
  }
  
-extern unsigned int at_min;
-extern unsigned int at_max;
-#define AT_OFF (at_max == 0)
+timeout_t obd_at_measure(struct obd_device *obd, struct adaptive_timeout *at,
+                           timeout_t timeout);
  
-static inline timeout_t at_get(struct adaptive_timeout *at)
-{
-       return (at->at_current_timeout > at_min) ?
-               at->at_current_timeout : at_min;
-}
-
-timeout_t at_measured(struct adaptive_timeout *at, timeout_t timeout);
  int import_at_get_index(struct obd_import *imp, int portal);
  
  /* genops.c */
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h

index badba82..acf8b2a 100644 (file)
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -2585,14 +2585,7 @@ static inline int ptlrpc_no_resend(struct ptlrpc_request *req)
         return req->rq_no_resend;
  }
  
-static inline int
-ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt)
-{
-       int at = AT_OFF ? 0 : at_get(&svcpt->scp_at_estimate);
-
-       return svcpt->scp_service->srv_watchdog_factor *
-              max_t(int, at, obd_timeout);
-}
+int ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt);
  
  static inline struct ptlrpc_service *
  ptlrpc_req2svc(struct ptlrpc_request *req)
diff --git a/lustre/include/obd.h b/lustre/include/obd.h

index 8b41f8b..d1759c6 100644 (file)
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -750,8 +750,47 @@ struct obd_device {
         struct kset                     obd_kset; /* sysfs object collection */
         struct kobj_type                obd_ktype;
         struct completion               obd_kobj_unregister;
+
+       /* adaptive timeout parameters */
+       unsigned int                    obd_at_min;
+       unsigned int                    obd_at_max;
+       unsigned int                    obd_at_history;
+       unsigned int                    obd_ldlm_enqueue_min;
  };
  
+#define obd_get_at_min(obd) ({ \
+       struct obd_device *_obd = obd; \
+       if (_obd == NULL) \
+               CDEBUG(D_RPCTRACE, "NULL obd\n"); \
+       _obd && _obd->obd_at_min ? _obd->obd_at_min : at_min; \
+})
+#define obd_get_at_max(obd) ({\
+       struct obd_device *_obd = obd; \
+       if (_obd == NULL) \
+               CDEBUG(D_RPCTRACE, "NULL obd\n"); \
+       _obd && _obd->obd_at_max ? _obd->obd_at_max : at_max; \
+})
+#define obd_get_at_history(obd) ({ \
+       struct obd_device *_obd = obd; \
+       if (_obd == NULL) \
+               CDEBUG(D_RPCTRACE, "NULL obd\n"); \
+       _obd && _obd->obd_at_history ? _obd->obd_at_history : at_history; \
+})
+extern unsigned int ldlm_enqueue_min;
+#define obd_get_ldlm_enqueue_min(obd) ({ \
+       struct obd_device *_obd = obd; \
+       if (_obd == NULL) \
+               CDEBUG(D_RPCTRACE, "NULL obd\n"); \
+       _obd && _obd->obd_ldlm_enqueue_min ? _obd->obd_ldlm_enqueue_min : \
+                                            ldlm_enqueue_min; \
+})
+#define obd_at_off(obd) (obd_get_at_max(obd) == 0)
+
+#define obd_at_get(obd, at) ({ \
+       timeout_t t1 = obd_get_at_min(obd); \
+       max_t(timeout_t, (at)->at_current_timeout, t1); \
+})
+
  int obd_uuid_add(struct obd_device *obd, struct obd_export *export);
  void obd_uuid_del(struct obd_device *obd, struct obd_export *export);
  #ifdef HAVE_SERVER_SUPPORT
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index 6069fa3..07d9332 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -2029,8 +2029,8 @@ check_and_start_recovery_timer(struct obd_device *obd,
                  * Teach server about old server's estimates, as first guess
                  * at how long new requests will take.
                  */
-               at_measured(&req->rq_rqbd->rqbd_svcpt->scp_at_estimate,
-                           service_timeout);
+               obd_at_measure(obd, &req->rq_rqbd->rqbd_svcpt->scp_at_estimate,
+                              service_timeout);
  
         target_start_recovery_timer(obd);
  
@@ -2424,12 +2424,13 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
         /* don't reset timer for final stage */
         if (!exp_finished(req->rq_export)) {
                 timeout_t timeout = obd_timeout;
+               struct obd_device *obd = req->rq_export->exp_obd;
  
                 /**
                  * Add request @timeout to the recovery time so next request from
                  * this client may come in recovery time
                  */
-               if (!AT_OFF) {
+               if (!obd_at_off(obd)) {
                         struct ptlrpc_service_part *svcpt;
                         timeout_t est_timeout;
  
@@ -2441,7 +2442,7 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
                          * use the maxium timeout here for waiting the client
                          * sending the next req
                          */
-                       est_timeout = at_get(&svcpt->scp_at_estimate);
+                       est_timeout = obd_at_get(obd, &svcpt->scp_at_estimate);
                         timeout = max_t(timeout_t, at_est2timeout(est_timeout),
                                         lustre_msg_get_timeout(req->rq_reqmsg));
                         /*
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index d043760..2f19e7e 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -438,7 +438,8 @@ static void ldlm_add_blocked_lock(struct ldlm_lock *lock)
  
  static int ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
  {
-       int ret;
+       struct obd_device *obd = NULL;
+       int at_off, ret;
  
         /* NB: must be called with hold of lock_res_and_lock() */
         LASSERT(ldlm_is_res_locked(lock));
@@ -448,9 +449,12 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
          * Do not put cross-MDT lock in the waiting list, since we
          * will not evict it due to timeout for now
          */
-       if (lock->l_export != NULL &&
-           (exp_connect_flags(lock->l_export) & OBD_CONNECT_MDS_MDS))
-               return 0;
+       if (lock->l_export != NULL) {
+               obd = lock->l_export->exp_obd;
+
+               if (exp_connect_flags(lock->l_export) & OBD_CONNECT_MDS_MDS)
+                       return 0;
+       }
  
         spin_lock_bh(&waiting_locks_spinlock);
         if (ldlm_is_cancel(lock)) {
@@ -484,9 +488,10 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
         if (ret)
                 ldlm_add_blocked_lock(lock);
  
+       at_off = obd_at_off(obd);
         LDLM_DEBUG(lock, "%sadding to wait list(timeout: %d, AT: %s)",
                    ret == 0 ? "not re-" : "", timeout,
-                  AT_OFF ? "off" : "on");
+                  at_off ? "off" : "on");
         return ret;
  }
  
@@ -631,8 +636,9 @@ int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
  timeout_t ldlm_bl_timeout(struct ldlm_lock *lock)
  {
         timeout_t timeout;
+       struct obd_device *obd = lock->l_export->exp_obd;
  
-       if (AT_OFF)
+       if (obd_at_off(obd))
                 return obd_timeout / 2;
  
         /*
@@ -641,9 +647,9 @@ timeout_t ldlm_bl_timeout(struct ldlm_lock *lock)
          * It would be nice to have some kind of "early reply" mechanism for
          * lock callbacks too...
          */
-       timeout = at_get(&lock->l_export->exp_bl_lock_at);
+       timeout = obd_at_get(obd, &lock->l_export->exp_bl_lock_at);
         return max_t(timeout_t, timeout + (timeout >> 1),
-                    (timeout_t)ldlm_enqueue_min);
+                    (timeout_t)obd_get_ldlm_enqueue_min(obd));
  }
  EXPORT_SYMBOL(ldlm_bl_timeout);
  
@@ -667,8 +673,9 @@ timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req)
  {
         struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
         timeout_t timeout, req_timeout, at_timeout, netl;
+       struct obd_device *obd = req->rq_export->exp_obd;
  
-       if (AT_OFF)
+       if (obd_at_off(obd))
                 return obd_timeout / 2;
  
         /* A blocked lock means somebody in the cluster is waiting, and we
@@ -681,9 +688,11 @@ timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req)
          * Either this on the next RPC times out, take the max.
          * Considering the current RPC, take just the left time.
          */
-       netl = at_get(&req->rq_export->exp_imp_reverse->imp_at.iat_net_latency);
+       netl = obd_at_get(obd,
+                         &req->rq_export->exp_imp_reverse->imp_at.iat_net_latency);
         req_timeout = req->rq_deadline - ktime_get_real_seconds() + netl;
-       at_timeout = at_est2timeout(at_get(&svcpt->scp_at_estimate)) + netl;
+       at_timeout = at_est2timeout(obd_at_get(obd, &svcpt->scp_at_estimate))
+                                   + netl;
         req_timeout = max(req_timeout, at_timeout);
  
         /* Take 1 re-connect failure and 1 re-connect success into account. */
@@ -691,7 +700,8 @@ timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req)
  
         /* Client's timeout is calculated as at_est2timeout(), let's be a bit
          * more conservative than client */
-       return max(timeout + (timeout >> 4), (timeout_t)ldlm_enqueue_min);
+       return max(timeout + (timeout >> 4),
+                  (timeout_t)obd_get_ldlm_enqueue_min(obd));
  }
  EXPORT_SYMBOL(ldlm_bl_timeout_by_rpc);
  
@@ -936,6 +946,7 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         struct ptlrpc_request  *req;
         int instant_cancel = 0;
         int rc = 0;
+       struct obd_device *obd;
  
         ENTRY;
  
@@ -950,7 +961,9 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
  
         LASSERT(lock);
         LASSERT(data != NULL);
-       if (lock->l_export->exp_obd->obd_recovering != 0)
+
+       obd = lock->l_export->exp_obd;
+       if (obd->obd_recovering != 0)
                 LDLM_ERROR(lock, "BUG 6063: lock collide during recovery");
  
         ldlm_lock_reorder_req(lock);
@@ -1019,7 +1032,7 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
  
         req->rq_send_state = LUSTRE_IMP_FULL;
         /* ptlrpc_request_alloc_pack already set timeout */
-       if (AT_OFF)
+       if (obd_at_off(obd))
                 req->rq_timeout = ldlm_get_rq_timeout();
  
         if (lock->l_export && lock->l_export->exp_nid_stats &&
@@ -1048,6 +1061,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
         int instant_cancel = 0;
         int rc = 0;
         int lvb_len;
+       struct obd_device *obd;
  
         ENTRY;
  
@@ -1059,6 +1073,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
                 RETURN(0);
         }
  
+       obd = lock->l_export->exp_obd;
         req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
                                    &RQF_LDLM_CP_CALLBACK);
         if (req == NULL)
@@ -1120,7 +1135,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
  
         req->rq_send_state = LUSTRE_IMP_FULL;
         /* ptlrpc_request_pack already set timeout */
-       if (AT_OFF)
+       if (obd_at_off(obd))
                 req->rq_timeout = ldlm_get_rq_timeout();
  
         /* We only send real blocking ASTs after the lock is granted */
@@ -1182,6 +1197,7 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
         struct ldlm_cb_async_args *ca;
         int rc;
         struct req_format *req_fmt;
+       struct obd_device *obd = lock->l_export->exp_obd;
  
         ENTRY;
  
@@ -1223,7 +1239,7 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
  
         req->rq_send_state = LUSTRE_IMP_FULL;
         /* ptlrpc_request_alloc_pack already set timeout */
-       if (AT_OFF)
+       if (obd_at_off(obd))
                 req->rq_timeout = ldlm_get_rq_timeout();
  
         req->rq_interpret_reply = ldlm_cb_interpret;
@@ -1815,7 +1831,9 @@ int ldlm_request_cancel(struct ptlrpc_request *req,
                         LDLM_DEBUG(lock,
                                    "server cancels blocked lock after %ds",
                                    delay);
-                       at_measured(&lock->l_export->exp_bl_lock_at, delay);
+                       obd_at_measure(lock->l_export->exp_obd,
+                                      &lock->l_export->exp_bl_lock_at,
+                                      delay);
                 }
                 ldlm_lock_cancel(lock);
                 LDLM_LOCK_PUT(lock);
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index 46ada0a..6fea124 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -174,8 +174,11 @@ EXPORT_SYMBOL(is_granted_or_cancelled_nolock);
  static timeout_t ldlm_cp_timeout(struct ldlm_lock *lock)
  {
         timeout_t timeout;
+       struct obd_device *obd;
+
+       obd = class_exp2obd(lock->l_conn_export);
  
-       if (AT_OFF)
+       if (obd_at_off(obd))
                 return obd_timeout;
  
         /*
@@ -183,8 +186,8 @@ static timeout_t ldlm_cp_timeout(struct ldlm_lock *lock)
          * lock from another client.  Server will evict the other client if it
          * doesn't respond reasonably, and then give us the lock.
          */
-       timeout = at_get(ldlm_lock_to_ns_at(lock));
-       return max(3 * timeout, (timeout_t)ldlm_enqueue_min);
+       timeout = obd_at_get(obd, ldlm_lock_to_ns_at(lock));
+       return max(3 * timeout, (timeout_t)obd_get_ldlm_enqueue_min(obd));
  }
  
  /**
@@ -201,6 +204,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock, void *data)
         } else if (data == NULL) {
                 LDLM_DEBUG(lock, "client-side enqueue: granted");
         } else {
+               struct obd_device *obd = class_exp2obd(lock->l_conn_export);
                 /* Take into AT only CP RPC, not immediately granted locks */
                 timeout_t delay = 0;
  
@@ -213,7 +217,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock, void *data)
                 LDLM_DEBUG(lock, "client-side enqueue: granted after %ds",
                            delay);
                 /* Update our time estimate */
-               at_measured(ldlm_lock_to_ns_at(lock), delay);
+               obd_at_measure(obd, ldlm_lock_to_ns_at(lock), delay);
         }
         return result;
  }
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c

index e41ef8a..30cde96 100644 (file)
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -940,7 +940,7 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
         for (idx = 0; idx < (1 << ns->ns_bucket_bits); idx++) {
                 struct ldlm_ns_bucket *nsb = &ns->ns_rs_buckets[idx];
  
-               at_init(&nsb->nsb_at_estimate, ldlm_enqueue_min, 0);
+               at_init(&nsb->nsb_at_estimate, obd_get_ldlm_enqueue_min(obd), 0);
                 nsb->nsb_namespace = ns;
                 nsb->nsb_reclaim_start = 0;
                 atomic_set(&nsb->nsb_count, 0);
diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c

index d813b2b..239c859 100644 (file)
--- a/lustre/mdc/lproc_mdc.c
+++ b/lustre/mdc/lproc_mdc.c
@@ -789,6 +789,10 @@ static ssize_t grant_shrink_interval_store(struct kobject *kobj,
  }
  LUSTRE_RW_ATTR(grant_shrink_interval);
  
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
  static struct attribute *mdc_attrs[] = {
         &lustre_attr_active.attr,
         &lustre_attr_checksums.attr,
@@ -802,6 +806,9 @@ static struct attribute *mdc_attrs[] = {
         &lustre_attr_grant_shrink_interval.attr,
         &lustre_attr_cur_lost_grant_bytes.attr,
         &lustre_attr_cur_dirty_grant_bytes.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_history.attr,
         NULL,
  };
  
diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c

index 293420f..deb35aa 100644 (file)
--- a/lustre/mdt/mdt_handler.c
+++ b/lustre/mdt/mdt_handler.c
@@ -493,15 +493,16 @@ static int mdt_statfs(struct tgt_session_info *tsi)
         ktime_t kstart = ktime_get();
         int current_blockbits;
         int rc;
+       timeout_t at_est;
  
         ENTRY;
  
         svcpt = req->rq_rqbd->rqbd_svcpt;
  
         /* This will trigger a watchdog timeout */
+       at_est = obd_at_get(mdt->mdt_lu_dev.ld_obd, &svcpt->scp_at_estimate);
         CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP,
-                        (MDT_SERVICE_WATCHDOG_FACTOR *
-                         at_get(&svcpt->scp_at_estimate)) + 1);
+                        (MDT_SERVICE_WATCHDOG_FACTOR * at_est) + 1);
  
         rc = mdt_check_ucred(info);
         if (rc)
diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c

index 61eabd6..39450f3 100644 (file)
--- a/lustre/mdt/mdt_lproc.c
+++ b/lustre/mdt/mdt_lproc.c
@@ -1651,6 +1651,11 @@ LUSTRE_RO_ATTR(num_exports);
  LUSTRE_RW_ATTR(grant_check_threshold);
  LUSTRE_RO_ATTR(eviction_count);
  
+/* per-device at parameters */
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
  static struct attribute *mdt_attrs[] = {
         &lustre_attr_tot_dirty.attr,
         &lustre_attr_tot_granted.attr,
@@ -1697,6 +1702,9 @@ static struct attribute *mdt_attrs[] = {
         &lustre_attr_enable_remote_subdir_mount.attr,
         &lustre_attr_max_mod_rpcs_in_flight.attr,
         &lustre_attr_enable_dmv_implicit_inherit.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_history.attr,
         NULL,
  };
  
diff --git a/lustre/mgc/lproc_mgc.c b/lustre/mgc/lproc_mgc.c

index b965186..5f1dbbf 100644 (file)
--- a/lustre/mgc/lproc_mgc.c
+++ b/lustre/mgc/lproc_mgc.c
@@ -108,11 +108,18 @@ static ssize_t dynamic_nids_store(struct kobject *kobj, struct attribute *attr,
  
  LUSTRE_RW_ATTR(dynamic_nids);
  
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
  static struct attribute *mgc_attrs[] = {
         &lustre_attr_mgs_conn_uuid.attr,
         &lustre_attr_conn_uuid.attr,
         &lustre_attr_ping.attr,
         &lustre_attr_dynamic_nids.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_history.attr,
         NULL,
  };
  
diff --git a/lustre/mgc/mgc_internal.h b/lustre/mgc/mgc_internal.h

index 291c78d..022320c 100644 (file)
--- a/lustre/mgc/mgc_internal.h
+++ b/lustre/mgc/mgc_internal.h
@@ -105,8 +105,9 @@ int mgc_barrier_glimpse_ast(struct ldlm_lock *lock, void *data);
  /* This is the timeout value for MGS_CONNECT request plus a ping interval, such
   * that we can have a chance to try the secondary MGS if any.
   */
-#define  MGC_ENQUEUE_LIMIT (INITIAL_CONNECT_TIMEOUT + (AT_OFF ? 0 : at_min) \
-                               + PING_INTERVAL)
+#define  MGC_ENQUEUE_LIMIT(obd) (INITIAL_CONNECT_TIMEOUT + \
+                                (obd_at_off(obd) ? 0 : obd_get_at_min(obd)) + \
+                                PING_INTERVAL)
  #define  MGC_TARGET_REG_LIMIT 10
  #define  MGC_TARGET_REG_LIMIT_MAX RECONNECT_DELAY_MAX
  #define  MGC_SEND_PARAM_LIMIT 10
diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c

index 134a40e..2efb6c1 100644 (file)
--- a/lustre/mgc/mgc_request.c
+++ b/lustre/mgc/mgc_request.c
@@ -992,9 +992,9 @@ static int mgc_enqueue(struct obd_export *exp, enum ldlm_type type,
             IS_SERVER(s2lsi(cld->cld_cfg.cfg_sb)))
                 short_limit = 1;
  
-        /* Limit how long we will wait for the enqueue to complete */
-        req->rq_delay_limit = short_limit ? 5 : MGC_ENQUEUE_LIMIT;
-        rc = ldlm_cli_enqueue(exp, &req, &einfo, &cld->cld_resid, NULL, flags,
+       /* Limit how long we will wait for the enqueue to complete */
+       req->rq_delay_limit = short_limit ? 5 : MGC_ENQUEUE_LIMIT(exp->exp_obd);
+       rc = ldlm_cli_enqueue(exp, &req, &einfo, &cld->cld_resid, NULL, flags,
                               NULL, 0, LVB_T_NONE, lockh, 0);
          /* A failed enqueue should still call the mgc_blocking_ast,
             where it will be requeued if needed ("grant failed"). */
diff --git a/lustre/mgs/lproc_mgs.c b/lustre/mgs/lproc_mgs.c

index b36cef0..3d58463 100644 (file)
--- a/lustre/mgs/lproc_mgs.c
+++ b/lustre/mgs/lproc_mgs.c
@@ -250,11 +250,18 @@ static ssize_t mntdev_show(struct kobject *kobj, struct attribute *attr,
  }
  LUSTRE_RO_ATTR(mntdev);
  
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
  static struct attribute *mgs_attrs[] = {
         &lustre_attr_fstype.attr,
         &lustre_attr_mntdev.attr,
         &lustre_attr_eviction_count.attr,
         &lustre_attr_num_exports.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_history.attr,
         NULL,
  };
  
diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c

index 8a1165e..f7270f8 100644 (file)
--- a/lustre/obdclass/class_obd.c
+++ b/lustre/obdclass/class_obd.c
@@ -89,6 +89,7 @@ EXPORT_SYMBOL(ldlm_timeout_set);
  /* bulk transfer timeout, give up after 100s by default */
  unsigned int bulk_timeout = 100; /* seconds */
  EXPORT_SYMBOL(bulk_timeout);
+
  /* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */
  unsigned int at_min = 5;
  EXPORT_SYMBOL(at_min);
diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c

index 4f09a04..730a919 100644 (file)
--- a/lustre/obdclass/lprocfs_status.c
+++ b/lustre/obdclass/lprocfs_status.c
@@ -875,13 +875,14 @@ static void lprocfs_import_seq_show_locked(struct seq_file *m,
                 if (imp->imp_at.iat_portal[j] == 0)
                         break;
                 k = max_t(unsigned int, k,
-                         at_get(&imp->imp_at.iat_service_estimate[j]));
+                         obd_at_get(imp->imp_obd,
+                                    &imp->imp_at.iat_service_estimate[j]));
         }
         seq_printf(m, "    service_estimates:\n"
                    "       services: %u sec\n"
                    "       network: %d sec\n",
                    k,
-                  at_get(&imp->imp_at.iat_net_latency));
+                  obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency));
  
         seq_printf(m, "    transactions:\n"
                    "       last_replay: %llu\n"
@@ -987,7 +988,7 @@ static void lprocfs_timeouts_seq_show_locked(struct seq_file *m,
                    "last reply", (s64)imp->imp_last_reply_time,
                    (s64)(now - imp->imp_last_reply_time));
  
-       cur_timeout = at_get(&imp->imp_at.iat_net_latency);
+       cur_timeout = obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency);
         worst_timeout = imp->imp_at.iat_net_latency.at_worst_timeout_ever;
         worst_timestamp = imp->imp_at.iat_net_latency.at_worst_timestamp;
         seq_printf(m, "%-10s : cur %3u  worst %3u (at %lld, %llds ago) ",
@@ -1002,7 +1003,7 @@ static void lprocfs_timeouts_seq_show_locked(struct seq_file *m,
                         break;
  
                 service_est = &imp->imp_at.iat_service_estimate[i];
-               cur_timeout = at_get(service_est);
+               cur_timeout = obd_at_get(imp->imp_obd, service_est);
                 worst_timeout = service_est->at_worst_timeout_ever;
                 worst_timestamp = service_est->at_worst_timestamp;
                 seq_printf(m, "portal %-2d  : cur %3u  worst %3u (at %lld, %llds ago) ",
diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c

index 8ddb93f..79091c4 100644 (file)
--- a/lustre/ofd/lproc_ofd.c
+++ b/lustre/ofd/lproc_ofd.c
@@ -988,6 +988,10 @@ void ofd_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset,
  
  LPROC_SEQ_FOPS(lprocfs_nid_stats_clear);
  
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
  static struct attribute *ofd_attrs[] = {
         &lustre_attr_tot_dirty.attr,
         &lustre_attr_tot_granted.attr,
@@ -1017,6 +1021,9 @@ static struct attribute *ofd_attrs[] = {
         &lustre_attr_access_log_size.attr,
         &lustre_attr_job_cleanup_interval.attr,
         &lustre_attr_checksum_t10pi_enforce.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_history.attr,
         NULL,
  };
  
diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c

index f7f8e3a..21e27f6 100644 (file)
--- a/lustre/osc/lproc_osc.c
+++ b/lustre/osc/lproc_osc.c
@@ -862,6 +862,11 @@ static int lprocfs_osc_attach_seqstat(struct obd_device *obd)
  }
  #endif /* CONFIG_PROC_FS */
  
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+LUSTRE_OBD_UINT_PARAM_ATTR(ldlm_enqueue_min);
+
  static struct attribute *osc_attrs[] = {
         &lustre_attr_active.attr,
         &lustre_attr_checksums.attr,
@@ -881,6 +886,10 @@ static struct attribute *osc_attrs[] = {
         &lustre_attr_idle_timeout.attr,
         &lustre_attr_idle_connect.attr,
         &lustre_attr_grant_shrink.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_history.attr,
+       &lustre_attr_ldlm_enqueue_min.attr,
         NULL,
  };
  
diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c

index b73b0e0..a40e0b4 100644 (file)
--- a/lustre/osc/osc_cache.c
+++ b/lustre/osc/osc_cache.c
@@ -1554,6 +1554,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
         int rc = -EDQUOT;
         int remain;
         bool entered = false;
+       struct obd_device *obd = cli->cl_import->imp_obd;
         /* We cannot wait for a long time here since we are holding ldlm lock
          * across the actual IO. If no requests complete fast (e.g. due to
          * overloaded OST that takes a long time to process everything, we'd
@@ -1562,8 +1563,10 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
          * evicted by server which is half obd_timeout when AT is off
          * or at least ldlm_enqueue_min with AT on.
          * See LU-13131 */
-       unsigned long timeout = cfs_time_seconds(AT_OFF ? obd_timeout / 2 :
-                                                         ldlm_enqueue_min / 2);
+       unsigned long timeout =
+               cfs_time_seconds(obd_at_off(obd) ?
+                                obd_timeout / 2 :
+                                obd_get_ldlm_enqueue_min(obd) / 2);
  
         ENTRY;
  
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 57e012c..f0e1546 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -314,9 +314,12 @@ EXPORT_SYMBOL(ptlrpc_free_bulk);
   */
  void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
  {
+       struct obd_device *obd;
+
         LASSERT(req->rq_import);
+       obd = req->rq_import->imp_obd;
  
-       if (AT_OFF) {
+       if (obd_at_off(obd)) {
                 /* non-AT settings */
                 /**
                  * \a imp_server_timeout means this is reverse import and
@@ -334,7 +337,7 @@ void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
  
                 idx = import_at_get_index(req->rq_import,
                                           req->rq_request_portal);
-               serv_est = at_get(&at->iat_service_estimate[idx]);
+               serv_est = obd_at_get(obd, &at->iat_service_estimate[idx]);
                 /*
                  * Currently a 32 bit value is sent over the
                  * wire for rq_timeout so please don't change this
@@ -361,8 +364,10 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
         int idx;
         timeout_t oldse;
         struct imp_at *at;
+       struct obd_device *obd;
  
         LASSERT(req->rq_import);
+       obd = req->rq_import->imp_obd;
         at = &req->rq_import->imp_at;
  
         idx = import_at_get_index(req->rq_import, req->rq_request_portal);
@@ -370,19 +375,28 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
          * max service estimates are tracked on the server side,
          * so just keep minimal history here
          */
-       oldse = at_measured(&at->iat_service_estimate[idx], serv_est);
-       if (oldse != 0)
+       oldse = obd_at_measure(obd, &at->iat_service_estimate[idx], serv_est);
+       if (oldse != 0) {
+               unsigned int at_est = obd_at_get(obd,
+                                               &at->iat_service_estimate[idx]);
                 CDEBUG(D_ADAPTTO,
                        "The RPC service estimate for %s ptl %d has changed from %d to %d\n",
                        req->rq_import->imp_obd->obd_name,
                        req->rq_request_portal,
-                      oldse, at_get(&at->iat_service_estimate[idx]));
+                      oldse, at_est);
+       }
  }
  
  /* Expected network latency per remote node (secs) */
  int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
  {
-       return AT_OFF ? 0 : at_get(&req->rq_import->imp_at.iat_net_latency);
+       struct obd_device *obd = NULL;
+
+       if (req->rq_import)
+               obd = req->rq_import->imp_obd;
+
+       return obd_at_off(obd) ?
+              0 : obd_at_get(obd, &req->rq_import->imp_at.iat_net_latency);
  }
  
  /* Adjust expected network latency */
@@ -393,8 +407,10 @@ void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
         struct imp_at *at;
         timeout_t oldnl;
         timeout_t nl;
+       struct obd_device *obd;
  
         LASSERT(req->rq_import);
+       obd = req->rq_import->imp_obd;
  
         if (service_timeout > now - req->rq_sent + 3) {
                 /*
@@ -418,13 +434,16 @@ void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
         nl = max_t(timeout_t, now - req->rq_sent - service_timeout, 0) + 1;
         at = &req->rq_import->imp_at;
  
-       oldnl = at_measured(&at->iat_net_latency, nl);
-       if (oldnl != 0)
+       oldnl = obd_at_measure(obd, &at->iat_net_latency, nl);
+       if (oldnl != 0) {
+               timeout_t timeout = obd_at_get(obd, &at->iat_net_latency);
+
                 CDEBUG(D_ADAPTTO,
                        "The network latency for %s (nid %s) has changed from %d to %d\n",
                        req->rq_import->imp_obd->obd_name,
                        obd_uuid2str(&req->rq_import->imp_connection->c_remote_uuid),
-                      oldnl, at_get(&at->iat_net_latency));
+                      oldnl, timeout);
+       }
  }
  
  static int unpack_reply(struct ptlrpc_request *req)
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c

index 1145862..f63eaf3 100644 (file)
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -569,16 +569,18 @@ static int import_select_connection(struct obd_import *imp)
          */
         if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
                 struct adaptive_timeout *at = &imp->imp_at.iat_net_latency;
+               timeout_t timeout = obd_at_get(imp->imp_obd, at);
  
-               if (at_get(at) < CONNECTION_SWITCH_MAX) {
-                       at_measured(at, at_get(at) + CONNECTION_SWITCH_INC);
-                       if (at_get(at) > CONNECTION_SWITCH_MAX)
+               if (timeout < CONNECTION_SWITCH_MAX) {
+                       obd_at_measure(imp->imp_obd, at,
+                                      timeout + CONNECTION_SWITCH_INC);
+                       if (timeout > CONNECTION_SWITCH_MAX)
                                 at_reset(at, CONNECTION_SWITCH_MAX);
                 }
                 LASSERT(imp_conn->oic_last_attempt);
                 CDEBUG(D_HA,
                        "%s: tried all connections, increasing latency to %ds\n",
-                      imp->imp_obd->obd_name, at_get(at));
+                      imp->imp_obd->obd_name, timeout);
         }
  
         imp_conn->oic_last_attempt = ktime_get_seconds();
@@ -1497,7 +1499,7 @@ static int signal_completed_replay(struct obd_import *imp)
         req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
         lustre_msg_add_flags(req->rq_reqmsg,
                              MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE);
-       if (AT_OFF)
+       if (obd_at_off(imp->imp_obd))
                 req->rq_timeout *= 3;
         req->rq_interpret_reply = completed_replay_interpret;
  
@@ -1731,7 +1733,7 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
                 long timeout_jiffies;
                 time64_t timeout;
  
-               if (AT_OFF) {
+               if (obd_at_off(imp->imp_obd)) {
                         if (imp->imp_server_timeout)
                                 timeout = obd_timeout >> 1;
                         else
@@ -1742,7 +1744,8 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
  
                         req_portal = imp->imp_client->cli_request_portal;
                         idx = import_at_get_index(imp, req_portal);
-                       timeout = at_get(&imp->imp_at.iat_service_estimate[idx]);
+                       timeout = obd_at_get(imp->imp_obd,
+                                       &imp->imp_at.iat_service_estimate[idx]);
                 }
  
                 timeout_jiffies = cfs_time_seconds(timeout);
@@ -1935,57 +1938,60 @@ void ptlrpc_cleanup_imp(struct obd_import *imp)
   *    but still smoothing out a return to normalcy from a slow response.
   *  - (E.g. remember the maximum latency in each minute of the last 4 minutes.)
   */
-timeout_t at_measured(struct adaptive_timeout *at, timeout_t timeout)
+timeout_t obd_at_measure(struct obd_device *obd, struct adaptive_timeout *at,
+                           timeout_t timeout)
  {
+       unsigned int l_at_min = obd_get_at_min(obd);
+       unsigned int l_at_max = obd_get_at_max(obd);
         timeout_t old_timeout = at->at_current_timeout;
         time64_t now = ktime_get_real_seconds();
-       long binlimit = max_t(long, at_history / AT_BINS, 1);
+       long binlimit = max_t(long, obd_get_at_history(obd) / AT_BINS, 1);
  
-        LASSERT(at);
+       LASSERT(at);
         CDEBUG(D_OTHER, "add %u to %p time=%lld v=%u (%u %u %u %u)\n",
                timeout, at, now - at->at_binstart, at->at_current_timeout,
-               at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
+              at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
  
         if (timeout <= 0)
                 /* Negative timeouts and 0's don't count, because we never
                  * want our timeout to drop to 0 or below, and because 0 could
                  * mean an error
                  */
-                return 0;
+               return 0;
  
         spin_lock(&at->at_lock);
  
-        if (unlikely(at->at_binstart == 0)) {
-                /* Special case to remove default from history */
+       if (unlikely(at->at_binstart == 0)) {
+               /* Special case to remove default from history */
                 at->at_current_timeout = timeout;
                 at->at_worst_timeout_ever = timeout;
                 at->at_worst_timestamp = now;
                 at->at_hist[0] = timeout;
-                at->at_binstart = now;
-        } else if (now - at->at_binstart < binlimit ) {
-                /* in bin 0 */
+               at->at_binstart = now;
+       } else if (now - at->at_binstart < binlimit) {
+               /* in bin 0 */
                 at->at_hist[0] = max_t(timeout_t, timeout, at->at_hist[0]);
                 at->at_current_timeout = max_t(timeout_t, timeout,
                                                at->at_current_timeout);
          } else {
-                int i, shift;
+               int i, shift;
                 timeout_t maxv = timeout;
  
                 /* move bins over */
                 shift = (u32)(now - at->at_binstart) / binlimit;
-                LASSERT(shift > 0);
-                for(i = AT_BINS - 1; i >= 0; i--) {
-                        if (i >= shift) {
-                                at->at_hist[i] = at->at_hist[i - shift];
+               LASSERT(shift > 0);
+               for (i = AT_BINS - 1; i >= 0; i--) {
+                       if (i >= shift) {
+                               at->at_hist[i] = at->at_hist[i - shift];
                                 maxv = max_t(timeout_t, maxv, at->at_hist[i]);
-                        } else {
-                                at->at_hist[i] = 0;
-                        }
-                }
+                       } else {
+                               at->at_hist[i] = 0;
+                       }
+               }
                 at->at_hist[0] = timeout;
                 at->at_current_timeout = maxv;
-                at->at_binstart += shift * binlimit;
-        }
+               at->at_binstart += shift * binlimit;
+       }
  
         if (at->at_current_timeout > at->at_worst_timeout_ever) {
                 at->at_worst_timeout_ever = at->at_current_timeout;
@@ -1993,23 +1999,24 @@ timeout_t at_measured(struct adaptive_timeout *at, timeout_t timeout)
         }
  
         if (at->at_flags & AT_FLG_NOHIST)
-                /* Only keep last reported val; keeping the rest of the history
+               /* Only keep last reported val; keeping the rest of the history
                  * for debugfs only
                  */
                 at->at_current_timeout = timeout;
  
-        if (at_max > 0)
+       if (l_at_max > 0)
                 at->at_current_timeout = min_t(timeout_t,
-                                              at->at_current_timeout, at_max);
+                                              at->at_current_timeout,
+                                              l_at_max);
         at->at_current_timeout = max_t(timeout_t, at->at_current_timeout,
-                                      at_min);
+                                      l_at_min);
         if (at->at_current_timeout != old_timeout)
                 CDEBUG(D_OTHER,
                        "AT %p change: old=%u new=%u delta=%d (val=%d) hist %u %u %u %u\n",
                        at, old_timeout, at->at_current_timeout,
                        at->at_current_timeout - old_timeout, timeout,
-                       at->at_hist[0], at->at_hist[1], at->at_hist[2],
-                       at->at_hist[3]);
+                      at->at_hist[0], at->at_hist[1], at->at_hist[2],
+                      at->at_hist[3]);
  
         /* if we changed, report the old timeout value */
         old_timeout = (at->at_current_timeout != old_timeout) ? old_timeout : 0;
diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c

index d34bcc3..7503271 100644 (file)
--- a/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/lustre/ptlrpc/lproc_ptlrpc.c
@@ -1116,14 +1116,14 @@ static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file *m, void *n)
         timeout_t worst_timeout;
         int i;
  
-       if (AT_OFF) {
+       if (obd_at_off(NULL)) {
                 seq_printf(m, "adaptive timeouts off, using obd_timeout %u\n",
                            obd_timeout);
                 return 0;
         }
  
         ptlrpc_service_for_each_part(svcpt, i, svc) {
-               cur_timeout = at_get(&svcpt->scp_at_estimate);
+               cur_timeout = obd_at_get(NULL, &svcpt->scp_at_estimate);
                 worst_timeout = svcpt->scp_at_estimate.at_worst_timeout_ever;
                 worst_timestamp = svcpt->scp_at_estimate.at_worst_timestamp;
  
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c

index 8047bc5..e00bcb4 100644 (file)
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -104,7 +104,7 @@ static int ptl_send_buf(struct lnet_handle_md *mdh, void *base, int len,
  }
  
  #define mdunlink_iterate_helper(mds, count) \
-               __mdunlink_iterate_helper(mds, count, false) 
+               __mdunlink_iterate_helper(mds, count, false)
  static void __mdunlink_iterate_helper(struct lnet_handle_md *bd_mds,
                                       int count, bool discard)
  {
@@ -510,10 +510,16 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
         struct ptlrpc_service_part      *svcpt = req->rq_rqbd->rqbd_svcpt;
         struct ptlrpc_service           *svc = svcpt->scp_service;
         timeout_t service_timeout;
+       struct obd_device *obd = NULL;
  
+       if (req->rq_export)
+               obd = req->rq_export->exp_obd;
+
+       service_timeout = obd_at_off(obd) ?
+                         obd_timeout * 3 / 2 : obd_get_at_max(obd);
         service_timeout = clamp_t(timeout_t, ktime_get_real_seconds() -
-                                            req->rq_arrival_time.tv_sec, 1,
-                                 (AT_OFF ? obd_timeout * 3 / 2 : at_max));
+                                 req->rq_arrival_time.tv_sec, 1,
+                                 service_timeout);
          if (!(flags & PTLRPC_REPLY_EARLY) &&
              (req->rq_type != PTL_RPC_MSG_ERR) &&
              (req->rq_reqmsg != NULL) &&
@@ -523,14 +529,14 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                  /* early replies, errors and recovery requests don't count
                  * toward our service time estimate
                  */
-               timeout_t oldse = at_measured(&svcpt->scp_at_estimate,
-                                             service_timeout);
+               timeout_t oldse = obd_at_measure(obd, &svcpt->scp_at_estimate,
+                                                service_timeout);
  
                 if (oldse != 0) {
                         DEBUG_REQ(D_ADAPTTO, req,
                                   "svc %s changed estimate from %d to %d",
                                   svc->srv_name, oldse,
-                                 at_get(&svcpt->scp_at_estimate));
+                                 obd_at_get(obd, &svcpt->scp_at_estimate));
                 }
          }
          /* Report actual service time for client latency calc */
@@ -540,8 +546,7 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
          * b=15815
          */
         if (req->rq_type == PTL_RPC_MSG_ERR &&
-           (req->rq_export == NULL ||
-            req->rq_export->exp_obd->obd_recovering)) {
+           (req->rq_export == NULL || obd->obd_recovering)) {
                 lustre_msg_set_timeout(req->rq_repmsg, 0);
         } else {
                 timeout_t timeout;
@@ -550,14 +555,12 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                     (flags & PTLRPC_REPLY_EARLY) &&
                     lustre_msg_get_flags(req->rq_reqmsg) &
                     (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
-                       struct obd_device *exp_obd = req->rq_export->exp_obd;
-
                         timeout = ktime_get_real_seconds() -
                                   req->rq_arrival_time.tv_sec +
                                   min_t(timeout_t, at_extra,
-                                       exp_obd->obd_recovery_timeout / 4);
+                                       obd->obd_recovery_timeout / 4);
                 } else {
-                       timeout = at_get(&svcpt->scp_at_estimate);
+                       timeout = obd_at_get(obd, &svcpt->scp_at_estimate);
                 }
                 lustre_msg_set_timeout(req->rq_repmsg, timeout);
         }
@@ -803,11 +806,11 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
          * This check has a race with ptlrpc_connect_import_locked()
          * with low chance, don't panic, only report.
          */
-       if (!(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL ||
+       if (!(obd_at_off(obd) || imp->imp_state != LUSTRE_IMP_FULL ||
             (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) ||
             !(imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_AT))) {
                 DEBUG_REQ(D_HA, request, "Wrong state of import detected, AT=%d, imp=%d, msghdr=%d, conn=%d\n",
-                         AT_OFF, imp->imp_state != LUSTRE_IMP_FULL,
+                         obd_at_off(obd), imp->imp_state != LUSTRE_IMP_FULL,
                           (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT),
                           !(imp->imp_connect_data.ocd_connect_flags &
                             OBD_CONNECT_AT));
diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c

index 4cb2585..4c800c9 100644 (file)
--- a/lustre/ptlrpc/pinger.c
+++ b/lustre/ptlrpc/pinger.c
@@ -121,8 +121,9 @@ static void ptlrpc_update_next_ping(struct obd_import *imp, int soon)
  
         if (imp->imp_state == LUSTRE_IMP_DISCON) {
                 time64_t dtime = max_t(time64_t, CONNECTION_SWITCH_MIN,
-                                      AT_OFF ? 0 :
-                                      at_get(&imp->imp_at.iat_net_latency));
+                                      obd_at_off(imp->imp_obd) ? 0 :
+                                      obd_at_get(imp->imp_obd,
+                                               &imp->imp_at.iat_net_latency));
                 time = min(time, dtime);
         }
         imp->imp_next_ping = ktime_get_seconds() + time;
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index cc33cb9..ea51d80 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -642,7 +642,7 @@ static int ptlrpc_service_part_init(struct ptlrpc_service *svc,
         spin_lock_init(&svcpt->scp_at_lock);
         array = &svcpt->scp_at_array;
  
-       size = at_est2timeout(at_max);
+       size = at_est2timeout(obd_get_at_max(NULL));
         array->paa_size     = size;
         array->paa_count    = 0;
         array->paa_deadline = -1;
@@ -1287,8 +1287,12 @@ static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
         struct ptlrpc_at_array *array = &svcpt->scp_at_array;
         struct ptlrpc_request *rq = NULL;
         __u32 index;
+       struct obd_device *obd = NULL;
  
-       if (AT_OFF)
+       if (req->rq_export)
+               obd = req->rq_export->exp_obd;
+
+       if (obd_at_off(obd))
                 return(0);
  
         if (req->rq_no_reply)
@@ -1365,9 +1369,13 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
         timeout_t olddl = req->rq_deadline - ktime_get_real_seconds();
         time64_t newdl;
         int rc;
+       struct obd_device *obd = NULL;
  
         ENTRY;
  
+       if (req->rq_export)
+               obd = req->rq_export->exp_obd;
+
         if (CFS_FAIL_CHECK(OBD_FAIL_TGT_REPLAY_RECONNECT) ||
             CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_ENQ_RESEND)) {
                 /* don't send early reply */
@@ -1380,11 +1388,11 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
          */
         DEBUG_REQ(D_ADAPTTO, req,
                   "%ssending early reply (deadline %+ds, margin %+ds) for %d+%d",
-                 AT_OFF ? "AT off - not " : "",
-                 olddl, olddl - at_get(&svcpt->scp_at_estimate),
-                 at_get(&svcpt->scp_at_estimate), at_extra);
+                 obd_at_off(obd) ? "AT off - not " : "",
+                 olddl, olddl - obd_at_get(obd, &svcpt->scp_at_estimate),
+                 obd_at_get(obd, &svcpt->scp_at_estimate), at_extra);
  
-       if (AT_OFF)
+       if (obd_at_off(obd))
                 RETURN(0);
  
         if (olddl < 0) {
@@ -1435,11 +1443,11 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                  * based on this service estimate (plus some additional time to
                  * account for network latency). See ptlrpc_at_recv_early_reply
                  */
-               at_measured(&svcpt->scp_at_estimate, at_extra +
+               obd_at_measure(obd, &svcpt->scp_at_estimate, at_extra +
                             ktime_get_real_seconds() -
                             req->rq_arrival_time.tv_sec);
                 newdl = req->rq_arrival_time.tv_sec +
-                       at_get(&svcpt->scp_at_estimate);
+                       obd_at_get(obd, &svcpt->scp_at_estimate);
         }
  
         /*
@@ -1621,11 +1629,14 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt)
                  * We're already past request deadlines before we even get a
                  * chance to send early replies
                  */
+               timeout_t atg = obd_at_get((struct obd_device *)NULL,
+                                          &svcpt->scp_at_estimate);
                 LCONSOLE_WARN("'%s' is processing requests too slowly, client may timeout. Late by %ds, missed %d early replies (reqs waiting=%d active=%d, at_estimate=%d, delay=%lldms)\n",
                               svcpt->scp_service->srv_name, -first, counter,
                               svcpt->scp_nreqs_incoming,
                               svcpt->scp_nreqs_active,
-                             at_get(&svcpt->scp_at_estimate), delay_ms);
+                             atg,
+                             delay_ms);
         }
  
         /*
@@ -2252,6 +2263,7 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
         s64 timediff_usecs;
         s64 arrived_usecs;
         int fail_opc = 0;
+       struct obd_device *obd = NULL;
  
         ENTRY;
  
@@ -2259,6 +2271,9 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
         if (request == NULL)
                 RETURN(0);
  
+       if (request->rq_export)
+               obd = request->rq_export->exp_obd;
+
         if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT))
                 fail_opc = OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT;
         else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
@@ -2285,7 +2300,7 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
                 lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR,
                                     svcpt->scp_nreqs_active);
                 lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT,
-                                   at_get(&svcpt->scp_at_estimate));
+                                   obd_at_get(obd, &svcpt->scp_at_estimate));
         }
  
         if (likely(request->rq_export)) {
@@ -3623,6 +3638,7 @@ static int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt)
         struct ptlrpc_request *request = NULL;
         struct timespec64 right_now;
         struct timespec64 timediff;
+       struct obd_device *obd = NULL;
  
         ktime_get_real_ts64(&right_now);
  
@@ -3641,8 +3657,11 @@ static int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt)
         timediff = timespec64_sub(right_now, request->rq_arrival_time);
         spin_unlock(&svcpt->scp_req_lock);
  
+       if (request->rq_export)
+               obd = request->rq_export->exp_obd;
+
         if ((timediff.tv_sec) >
-           (AT_OFF ? obd_timeout * 3 / 2 : at_max)) {
+           (obd_at_off(obd) ? obd_timeout * 3 / 2 : obd_get_at_max(obd))) {
                 CERROR("%s: unhealthy - request has been waiting %llds\n",
                        svcpt->scp_service->srv_name, (s64)timediff.tv_sec);
                 return -1;
@@ -3669,3 +3688,15 @@ ptlrpc_service_health_check(struct ptlrpc_service *svc)
         return 0;
  }
  EXPORT_SYMBOL(ptlrpc_service_health_check);
+
+int
+ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt)
+{
+       int at = 0;
+
+       if (!obd_at_off(NULL))
+               at = obd_at_get(NULL, &svcpt->scp_at_estimate);
+
+       return svcpt->scp_service->srv_watchdog_factor *
+              max_t(int, at, obd_timeout);
+}
diff --git a/lustre/quota/qmt_handler.c b/lustre/quota/qmt_handler.c

index 5db3dfe..b25ee34 100644 (file)
--- a/lustre/quota/qmt_handler.c
+++ b/lustre/quota/qmt_handler.c
@@ -1190,8 +1190,13 @@ static int qmt_dqacq(const struct lu_env *env, struct lu_device *ld,
         struct ldlm_lock *lock;
         int rtype, qtype;
         int rc, idx, stype;
+       struct obd_device *obd = NULL;
+
         ENTRY;
  
+       if (req->rq_export)
+               obd = req->rq_export->exp_obd;
+
         qbody = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_BODY);
         if (qbody == NULL)
                 RETURN(err_serious(-EPROTO));
@@ -1247,7 +1252,9 @@ static int qmt_dqacq(const struct lu_env *env, struct lu_device *ld,
                         timeout_t timeout;
  
                         svc = req->rq_rqbd->rqbd_svcpt;
-                       timeout = at_est2timeout(at_get(&svc->scp_at_estimate));
+                       timeout = at_est2timeout(
+                                       obd_at_get(obd, &svc->scp_at_estimate));
+
                         timeout += (ldlm_bl_timeout(lock) >> 1);
  
                         /* lock is being cancelled, prolong timeout */
diff --git a/lustre/quota/qsd_internal.h b/lustre/quota/qsd_internal.h

index b530943..d6ad898 100644 (file)
--- a/lustre/quota/qsd_internal.h
+++ b/lustre/quota/qsd_internal.h
@@ -330,9 +330,11 @@ static inline void qsd_set_edquot(struct lquota_entry *lqe, bool edquot)
   * quota space */
  static inline int qsd_wait_timeout(struct qsd_instance *qsd)
  {
+       struct obd_device *obd = qsd->qsd_dev->dd_lu_dev.ld_obd;
+
         if (qsd->qsd_timeout != 0)
                 return qsd->qsd_timeout;
-       return min_t(int, at_max / 2, obd_timeout / 2);
+       return min_t(int, obd_get_at_max(obd) / 2, obd_timeout / 2);
  }
  
  /* qsd_entry.c */
diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh

index 41518f6..e83a681 100644 (file)
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -6027,6 +6027,109 @@ test_73() { #LU-3006
  }
  run_test 73 "failnode to update from mountdata properly"
  
+# LU-15246
+test_74() {
+       (( $MDS1_VERSION >= $(version_code 2.15.57.16) )) ||
+               skip "need MDS version >= 2.15.57.16 for per-device timeouts"
+
+       setup
+       stack_trap "cleanup"
+
+       # Prepare fs2, share the mgs of fs
+       local FSNAME2=fs15246
+       local fs2mdsdev=$(mdsdevname 1_2)
+       local fs2ostdev=$(ostdevname 1_2)
+       local fs2mdsvdev=$(mdsvdevname 1_2)
+       local fs2ostvdev=$(ostvdevname 1_2)
+
+       add fs2mds $(mkfs_opts mds1 $fs2mdsdev) --fsname=$FSNAME2 \
+               --reformat $fs2mdsdev $fs2mdsvdev || error "add fs2mds failed"
+       add fs2ost $(mkfs_opts ost1 $fs2ostdev) --fsname=$FSNAME2 \
+               --reformat $fs2ostdev $fs2ostvdev || error "add fs2ost failed"
+
+       stack_trap "cleanup_fs2"
+
+       start fs2ost $fs2ostdev $OST_MOUNT_OPTS || error "start fs2ost failed"
+       start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS || error "start fs2mds failed"
+
+       mkdir -p $MOUNT2 || error "mkdir $MOUNT2 failed"
+       $MOUNT_CMD $MGSNID:/$FSNAME2 $MOUNT2 || error "mount $MOUNT2 failed"
+
+       echo "========== All mounted lustre fs ===================="
+       mount | grep 'type lustre'
+       echo "====================================================="
+
+       # Set and check osc/ldlm_enqueue_min
+       $LCTL set_param osc.${FSNAME}-*.ldlm_enqueue_min=99 ||
+               error "failed to set per-device adaptive parameters on client"
+       stack_trap "$LCTL set_param osc.${FSNAME}-*.ldlm_enqueue_min=0"
+
+       local ldlm_enqueue_min
+       ldlm_enqueue_min=$($LCTL get_param -n osc.${FSNAME}-*.ldlm_enqueue_min \
+                          | uniq)
+       (( $ldlm_enqueue_min == 99 )) ||
+               error "wrong ldlm_enqueue_min value for osc.${FSNAME}-*"
+
+       # Check fs2 as reference
+       ldlm_enqueue_min=$($LCTL get_param -n osc.${FSNAME2}-*.ldlm_enqueue_min\
+                          | uniq)
+       (( $ldlm_enqueue_min == 0 )) ||
+               error "wrong ldlm_enqueue_min value for osc.${FSNAME2}-*"
+
+       # Set and check obdfilter/at_min
+       do_facet ost1 $LCTL set_param obdfilter.${FSNAME}-*.at_min=1 ||
+               error "failed to set per-device adaptive parameters on ost"
+       stack_trap "do_facet ost1 $LCTL set_param obdfilter.${FSNAME}-*.at_min=0"
+
+       local at_min
+       at_min=$(do_facet ost1 $LCTL get_param -n obdfilter.${FSNAME}-*.at_min \
+               | uniq)
+       (( $at_min == 1 )) ||
+               error "wrong at_min value for obdfilter.${FSNAME}-*"
+
+       # set and check mdc/at_max
+       $LCTL set_param mdc.${FSNAME}-*.at_max=599 ||
+               error "failed to set per-device adaptive parameters on client"
+       stack_trap "$LCTL set_param mdc.${FSNAME}-*.at_max=0"
+
+       local at_max
+       at_max=$($LCTL get_param -n mdc.${FSNAME}-*.at_max | uniq)
+       (( $at_max == 599 )) ||
+               error "wrong at_max value for osc.${FSNAME}-*"
+
+       # Check fs2 as reference
+       at_max=$($LCTL get_param -n mdc.${FSNAME2}-*.at_max | uniq)
+       (( $at_max == 0 )) ||
+               error "wrong at_max value for osc.${FSNAME2}-*"
+
+       # Set and check mds/at_max
+       do_facet mds1 $LCTL set_param *.${FSNAME}-*.at_max=599 ||
+               error "failed to set per-device adaptive parameters on mds"
+       stack_trap "do_facet mds1 $LCTL set_param *.${FSNAME}-*.at_max=0"
+
+       local at_max
+       at_max=$(do_facet mds1 $LCTL get_param -n mdt.${FSNAME}-*.at_max | uniq)
+       (( at_max == 599 )) ||
+               error "wrong at_max value for mdt.${FSNAME}-*"
+
+       # Set and check mgs&mgc/at_history
+       local mgs_nid=$(do_facet $SINGLEMDS $LCTL list_nids | tail -1)
+       $LCTL set_param mgc.MGC$mgs_nid.at_history=588
+       stack_trap "$LCTL set_param mgc.MGC$mgs_nid.at_history=0"
+
+       local at_history
+       at_history=$($LCTL get_param -n mgc.MGC$mgs_nid.at_history)
+       (( $at_history == 588 )) ||
+               error "wrong at_history value for mgc.MGC$mgs_nid"
+
+       do_facet mgs $LCTL set_param mgs.MGS.at_history=588
+       stack_trap "do_facet mgs $LCTL set_param mgs.MGS.at_history=0"
+       at_history=$(do_facet mgs $LCTL get_param -n mgs.MGS.at_history)
+       (( $at_history == 588 )) ||
+               error "wrong at_history value for mgs.MGS"
+}
+run_test 74 "Test per-device adaptive timeout parameters"
+
  test_75() { # LU-2374
         [[ "$MDS1_VERSION" -lt $(version_code 2.4.1) ]] &&
                 skip "Need MDS version at least 2.4.1"
@@ -6037,6 +6140,7 @@ test_75() { # LU-2374
         local opts_ost="$(mkfs_opts ost1 $(ostdevname 1)) \
                 --replace --reformat $(ostdevname 1) $(ostvdevname 1)"
  
+       load_modules
         #check with default parameters
         add mds1 $opts_mds || error "add mds1 failed for default params"
         add ost1 $opts_ost || error "add ost1 failed for default params"
author	Lei Feng <flei@whamcloud.com>
	Thu, 18 Nov 2021 00:34:46 +0000 (08:34 +0800)
committer	Oleg Drokin <green@whamcloud.com>
	Wed, 25 Oct 2023 18:04:55 +0000 (18:04 +0000)
lustre/include/lprocfs_status.h		patch \| blob \| history
lustre/include/lustre_dlm.h		patch \| blob \| history
lustre/include/lustre_import.h		patch \| blob \| history
lustre/include/lustre_net.h		patch \| blob \| history
lustre/include/obd.h		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/ldlm/ldlm_lockd.c		patch \| blob \| history
lustre/ldlm/ldlm_request.c		patch \| blob \| history
lustre/ldlm/ldlm_resource.c		patch \| blob \| history
lustre/mdc/lproc_mdc.c		patch \| blob \| history
lustre/mdt/mdt_handler.c		patch \| blob \| history
lustre/mdt/mdt_lproc.c		patch \| blob \| history
lustre/mgc/lproc_mgc.c		patch \| blob \| history
lustre/mgc/mgc_internal.h		patch \| blob \| history
lustre/mgc/mgc_request.c		patch \| blob \| history
lustre/mgs/lproc_mgs.c		patch \| blob \| history
lustre/obdclass/class_obd.c		patch \| blob \| history
lustre/obdclass/lprocfs_status.c		patch \| blob \| history
lustre/ofd/lproc_ofd.c		patch \| blob \| history
lustre/osc/lproc_osc.c		patch \| blob \| history
lustre/osc/osc_cache.c		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/import.c		patch \| blob \| history
lustre/ptlrpc/lproc_ptlrpc.c		patch \| blob \| history
lustre/ptlrpc/niobuf.c		patch \| blob \| history
lustre/ptlrpc/pinger.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history
lustre/quota/qmt_handler.c		patch \| blob \| history
lustre/quota/qsd_internal.h		patch \| blob \| history
lustre/tests/conf-sanity.sh		patch \| blob \| history