Whamcloud - gitweb
LU-15246 ptlrpc: per-device adaptive timeout parameters 98/45598/38
authorLei Feng <flei@whamcloud.com>
Thu, 18 Nov 2021 00:34:46 +0000 (08:34 +0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 25 Oct 2023 18:04:55 +0000 (18:04 +0000)
When a client is mounting multiple filesystems with different
MGSes setting global parameters at_min, at_max, etc., then the
settings from one filesystem's MGS config will also apply to RPCs
sent for the OSC, MDC, and MGC devices on the other filesystem(s).
Typically the settings of the last filesystem to mount on the client
override the earlier values, and there is no way to separate them.

Moving the parameters to be per-device values allows them to be
set independently for each set of client devices, so that the
client can interact properly with each set of servers.  This allows
e.g. different timeouts for local and remote mounts, or for flash
and HDD filesystems that have different load and performance.

Add per-device adaptive timeout parameters that can optionally
replace global parameters of the same name:

    at_min     -> *.<fsname>*.at_min
    at_max     -> *.<fsname>*.at_max
    at_history -> *.<fsname>*.at_history
    ldlm_enqeue_min -> *.<fsname>*.ldlm_enqueue_min

These parameters should always be set with fsname in the device
name, rather than pure wildcard '*' device names, or it will be
be same as the global parameters in the end (settings from one
MGS will apply to devices on other filesystems).  That is a bug
in how "lctl set_param -P" works, but will be fixed separately.

Signed-off-by: Lei Feng <flei@whamcloud.com>
Change-Id: I5b04c9aa53a446fb5a78bfaff372b4f236c9eb8a
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/45598
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
30 files changed:
lustre/include/lprocfs_status.h
lustre/include/lustre_dlm.h
lustre/include/lustre_import.h
lustre/include/lustre_net.h
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/ldlm/ldlm_resource.c
lustre/mdc/lproc_mdc.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_lproc.c
lustre/mgc/lproc_mgc.c
lustre/mgc/mgc_internal.h
lustre/mgc/mgc_request.c
lustre/mgs/lproc_mgs.c
lustre/obdclass/class_obd.c
lustre/obdclass/lprocfs_status.c
lustre/ofd/lproc_ofd.c
lustre/osc/lproc_osc.c
lustre/osc/osc_cache.c
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/service.c
lustre/quota/qmt_handler.c
lustre/quota/qsd_internal.h
lustre/tests/conf-sanity.sh

index df9d5f8..2209bc8 100644 (file)
@@ -877,6 +877,31 @@ ssize_t lustre_attr_store(struct kobject *kobj, struct attribute *attr,
 
 extern const struct sysfs_ops lustre_sysfs_ops;
 
+#define LUSTRE_OBD_UINT_PARAM_ATTR(name)\
+static ssize_t name##_show(struct kobject *kobj, struct attribute *attr,\
+                          char *buf)                                   \
+{                                                                      \
+       int rc;                                                         \
+       struct obd_device *obd =                                        \
+               container_of(kobj, struct obd_device, obd_kset.kobj);   \
+       rc = snprintf(buf, PAGE_SIZE, "%u\n", obd->obd_##name); \
+       return rc;                                                      \
+}                                                                      \
+static ssize_t name##_store(struct kobject *kobj, struct attribute *attr,\
+                           const char *buffer, size_t count)           \
+{                                                                      \
+       int rc;                                                         \
+       unsigned int val;                                               \
+       struct obd_device *obd =                                        \
+               container_of(kobj, struct obd_device, obd_kset.kobj);   \
+       rc = kstrtouint(buffer, 10, &val);                              \
+       if (rc)                                                         \
+               return rc;                                              \
+       obd->obd_##name = val;                                          \
+       return count;                                                   \
+}                                                                      \
+LUSTRE_RW_ATTR(name)
+
 /* lproc_ptlrpc.c */
 struct ptlrpc_request;
 extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
index 33b5f0d..38776a2 100644 (file)
@@ -1808,7 +1808,6 @@ int ldlm_cli_inodebits_convert(struct ldlm_lock *lock,
 
 /** @} ldlm_cli_api */
 
-extern unsigned int ldlm_enqueue_min;
 
 /* mds/handler.c */
 /* This has to be here because recursive inclusion sucks. */
index a4df13e..b19a933 100644 (file)
@@ -90,9 +90,9 @@ struct ptlrpc_at_array {
 
 #define IMP_AT_MAX_PORTALS 8
 struct imp_at {
-        int                     iat_portal[IMP_AT_MAX_PORTALS];
-        struct adaptive_timeout iat_net_latency;
-        struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
+       int                     iat_portal[IMP_AT_MAX_PORTALS];
+       struct adaptive_timeout iat_net_latency;
+       struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
 };
 
 
@@ -252,7 +252,7 @@ struct obd_import {
        int                       imp_initiated_at;
         /** Incremented every time we send reconnection request */
         __u32                     imp_conn_cnt;
-       /** 
+       /**
         * \see ptlrpc_free_committed remembers imp_generation value here
         * after a check to save on unnecessary replay list iterations
         */
@@ -409,17 +409,9 @@ static inline void at_reinit(struct adaptive_timeout *at, timeout_t timeout,
        spin_unlock(&at->at_lock);
 }
 
-extern unsigned int at_min;
-extern unsigned int at_max;
-#define AT_OFF (at_max == 0)
+timeout_t obd_at_measure(struct obd_device *obd, struct adaptive_timeout *at,
+                           timeout_t timeout);
 
-static inline timeout_t at_get(struct adaptive_timeout *at)
-{
-       return (at->at_current_timeout > at_min) ?
-               at->at_current_timeout : at_min;
-}
-
-timeout_t at_measured(struct adaptive_timeout *at, timeout_t timeout);
 int import_at_get_index(struct obd_import *imp, int portal);
 
 /* genops.c */
index badba82..acf8b2a 100644 (file)
@@ -2585,14 +2585,7 @@ static inline int ptlrpc_no_resend(struct ptlrpc_request *req)
        return req->rq_no_resend;
 }
 
-static inline int
-ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt)
-{
-       int at = AT_OFF ? 0 : at_get(&svcpt->scp_at_estimate);
-
-       return svcpt->scp_service->srv_watchdog_factor *
-              max_t(int, at, obd_timeout);
-}
+int ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt);
 
 static inline struct ptlrpc_service *
 ptlrpc_req2svc(struct ptlrpc_request *req)
index 8b41f8b..d1759c6 100644 (file)
@@ -750,8 +750,47 @@ struct obd_device {
        struct kset                     obd_kset; /* sysfs object collection */
        struct kobj_type                obd_ktype;
        struct completion               obd_kobj_unregister;
+
+       /* adaptive timeout parameters */
+       unsigned int                    obd_at_min;
+       unsigned int                    obd_at_max;
+       unsigned int                    obd_at_history;
+       unsigned int                    obd_ldlm_enqueue_min;
 };
 
+#define obd_get_at_min(obd) ({ \
+       struct obd_device *_obd = obd; \
+       if (_obd == NULL) \
+               CDEBUG(D_RPCTRACE, "NULL obd\n"); \
+       _obd && _obd->obd_at_min ? _obd->obd_at_min : at_min; \
+})
+#define obd_get_at_max(obd) ({\
+       struct obd_device *_obd = obd; \
+       if (_obd == NULL) \
+               CDEBUG(D_RPCTRACE, "NULL obd\n"); \
+       _obd && _obd->obd_at_max ? _obd->obd_at_max : at_max; \
+})
+#define obd_get_at_history(obd) ({ \
+       struct obd_device *_obd = obd; \
+       if (_obd == NULL) \
+               CDEBUG(D_RPCTRACE, "NULL obd\n"); \
+       _obd && _obd->obd_at_history ? _obd->obd_at_history : at_history; \
+})
+extern unsigned int ldlm_enqueue_min;
+#define obd_get_ldlm_enqueue_min(obd) ({ \
+       struct obd_device *_obd = obd; \
+       if (_obd == NULL) \
+               CDEBUG(D_RPCTRACE, "NULL obd\n"); \
+       _obd && _obd->obd_ldlm_enqueue_min ? _obd->obd_ldlm_enqueue_min : \
+                                            ldlm_enqueue_min; \
+})
+#define obd_at_off(obd) (obd_get_at_max(obd) == 0)
+
+#define obd_at_get(obd, at) ({ \
+       timeout_t t1 = obd_get_at_min(obd); \
+       max_t(timeout_t, (at)->at_current_timeout, t1); \
+})
+
 int obd_uuid_add(struct obd_device *obd, struct obd_export *export);
 void obd_uuid_del(struct obd_device *obd, struct obd_export *export);
 #ifdef HAVE_SERVER_SUPPORT
index 6069fa3..07d9332 100644 (file)
@@ -2029,8 +2029,8 @@ check_and_start_recovery_timer(struct obd_device *obd,
                 * Teach server about old server's estimates, as first guess
                 * at how long new requests will take.
                 */
-               at_measured(&req->rq_rqbd->rqbd_svcpt->scp_at_estimate,
-                           service_timeout);
+               obd_at_measure(obd, &req->rq_rqbd->rqbd_svcpt->scp_at_estimate,
+                              service_timeout);
 
        target_start_recovery_timer(obd);
 
@@ -2424,12 +2424,13 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
        /* don't reset timer for final stage */
        if (!exp_finished(req->rq_export)) {
                timeout_t timeout = obd_timeout;
+               struct obd_device *obd = req->rq_export->exp_obd;
 
                /**
                 * Add request @timeout to the recovery time so next request from
                 * this client may come in recovery time
                 */
-               if (!AT_OFF) {
+               if (!obd_at_off(obd)) {
                        struct ptlrpc_service_part *svcpt;
                        timeout_t est_timeout;
 
@@ -2441,7 +2442,7 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
                         * use the maxium timeout here for waiting the client
                         * sending the next req
                         */
-                       est_timeout = at_get(&svcpt->scp_at_estimate);
+                       est_timeout = obd_at_get(obd, &svcpt->scp_at_estimate);
                        timeout = max_t(timeout_t, at_est2timeout(est_timeout),
                                        lustre_msg_get_timeout(req->rq_reqmsg));
                        /*
index d043760..2f19e7e 100644 (file)
@@ -438,7 +438,8 @@ static void ldlm_add_blocked_lock(struct ldlm_lock *lock)
 
 static int ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
 {
-       int ret;
+       struct obd_device *obd = NULL;
+       int at_off, ret;
 
        /* NB: must be called with hold of lock_res_and_lock() */
        LASSERT(ldlm_is_res_locked(lock));
@@ -448,9 +449,12 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
         * Do not put cross-MDT lock in the waiting list, since we
         * will not evict it due to timeout for now
         */
-       if (lock->l_export != NULL &&
-           (exp_connect_flags(lock->l_export) & OBD_CONNECT_MDS_MDS))
-               return 0;
+       if (lock->l_export != NULL) {
+               obd = lock->l_export->exp_obd;
+
+               if (exp_connect_flags(lock->l_export) & OBD_CONNECT_MDS_MDS)
+                       return 0;
+       }
 
        spin_lock_bh(&waiting_locks_spinlock);
        if (ldlm_is_cancel(lock)) {
@@ -484,9 +488,10 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
        if (ret)
                ldlm_add_blocked_lock(lock);
 
+       at_off = obd_at_off(obd);
        LDLM_DEBUG(lock, "%sadding to wait list(timeout: %d, AT: %s)",
                   ret == 0 ? "not re-" : "", timeout,
-                  AT_OFF ? "off" : "on");
+                  at_off ? "off" : "on");
        return ret;
 }
 
@@ -631,8 +636,9 @@ int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
 timeout_t ldlm_bl_timeout(struct ldlm_lock *lock)
 {
        timeout_t timeout;
+       struct obd_device *obd = lock->l_export->exp_obd;
 
-       if (AT_OFF)
+       if (obd_at_off(obd))
                return obd_timeout / 2;
 
        /*
@@ -641,9 +647,9 @@ timeout_t ldlm_bl_timeout(struct ldlm_lock *lock)
         * It would be nice to have some kind of "early reply" mechanism for
         * lock callbacks too...
         */
-       timeout = at_get(&lock->l_export->exp_bl_lock_at);
+       timeout = obd_at_get(obd, &lock->l_export->exp_bl_lock_at);
        return max_t(timeout_t, timeout + (timeout >> 1),
-                    (timeout_t)ldlm_enqueue_min);
+                    (timeout_t)obd_get_ldlm_enqueue_min(obd));
 }
 EXPORT_SYMBOL(ldlm_bl_timeout);
 
@@ -667,8 +673,9 @@ timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req)
 {
        struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
        timeout_t timeout, req_timeout, at_timeout, netl;
+       struct obd_device *obd = req->rq_export->exp_obd;
 
-       if (AT_OFF)
+       if (obd_at_off(obd))
                return obd_timeout / 2;
 
        /* A blocked lock means somebody in the cluster is waiting, and we
@@ -681,9 +688,11 @@ timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req)
         * Either this on the next RPC times out, take the max.
         * Considering the current RPC, take just the left time.
         */
-       netl = at_get(&req->rq_export->exp_imp_reverse->imp_at.iat_net_latency);
+       netl = obd_at_get(obd,
+                         &req->rq_export->exp_imp_reverse->imp_at.iat_net_latency);
        req_timeout = req->rq_deadline - ktime_get_real_seconds() + netl;
-       at_timeout = at_est2timeout(at_get(&svcpt->scp_at_estimate)) + netl;
+       at_timeout = at_est2timeout(obd_at_get(obd, &svcpt->scp_at_estimate))
+                                   + netl;
        req_timeout = max(req_timeout, at_timeout);
 
        /* Take 1 re-connect failure and 1 re-connect success into account. */
@@ -691,7 +700,8 @@ timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req)
 
        /* Client's timeout is calculated as at_est2timeout(), let's be a bit
         * more conservative than client */
-       return max(timeout + (timeout >> 4), (timeout_t)ldlm_enqueue_min);
+       return max(timeout + (timeout >> 4),
+                  (timeout_t)obd_get_ldlm_enqueue_min(obd));
 }
 EXPORT_SYMBOL(ldlm_bl_timeout_by_rpc);
 
@@ -936,6 +946,7 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
        struct ptlrpc_request  *req;
        int instant_cancel = 0;
        int rc = 0;
+       struct obd_device *obd;
 
        ENTRY;
 
@@ -950,7 +961,9 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
 
        LASSERT(lock);
        LASSERT(data != NULL);
-       if (lock->l_export->exp_obd->obd_recovering != 0)
+
+       obd = lock->l_export->exp_obd;
+       if (obd->obd_recovering != 0)
                LDLM_ERROR(lock, "BUG 6063: lock collide during recovery");
 
        ldlm_lock_reorder_req(lock);
@@ -1019,7 +1032,7 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
 
        req->rq_send_state = LUSTRE_IMP_FULL;
        /* ptlrpc_request_alloc_pack already set timeout */
-       if (AT_OFF)
+       if (obd_at_off(obd))
                req->rq_timeout = ldlm_get_rq_timeout();
 
        if (lock->l_export && lock->l_export->exp_nid_stats &&
@@ -1048,6 +1061,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
        int instant_cancel = 0;
        int rc = 0;
        int lvb_len;
+       struct obd_device *obd;
 
        ENTRY;
 
@@ -1059,6 +1073,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
                RETURN(0);
        }
 
+       obd = lock->l_export->exp_obd;
        req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
                                   &RQF_LDLM_CP_CALLBACK);
        if (req == NULL)
@@ -1120,7 +1135,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
 
        req->rq_send_state = LUSTRE_IMP_FULL;
        /* ptlrpc_request_pack already set timeout */
-       if (AT_OFF)
+       if (obd_at_off(obd))
                req->rq_timeout = ldlm_get_rq_timeout();
 
        /* We only send real blocking ASTs after the lock is granted */
@@ -1182,6 +1197,7 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
        struct ldlm_cb_async_args *ca;
        int rc;
        struct req_format *req_fmt;
+       struct obd_device *obd = lock->l_export->exp_obd;
 
        ENTRY;
 
@@ -1223,7 +1239,7 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
 
        req->rq_send_state = LUSTRE_IMP_FULL;
        /* ptlrpc_request_alloc_pack already set timeout */
-       if (AT_OFF)
+       if (obd_at_off(obd))
                req->rq_timeout = ldlm_get_rq_timeout();
 
        req->rq_interpret_reply = ldlm_cb_interpret;
@@ -1815,7 +1831,9 @@ int ldlm_request_cancel(struct ptlrpc_request *req,
                        LDLM_DEBUG(lock,
                                   "server cancels blocked lock after %ds",
                                   delay);
-                       at_measured(&lock->l_export->exp_bl_lock_at, delay);
+                       obd_at_measure(lock->l_export->exp_obd,
+                                      &lock->l_export->exp_bl_lock_at,
+                                      delay);
                }
                ldlm_lock_cancel(lock);
                LDLM_LOCK_PUT(lock);
index 46ada0a..6fea124 100644 (file)
@@ -174,8 +174,11 @@ EXPORT_SYMBOL(is_granted_or_cancelled_nolock);
 static timeout_t ldlm_cp_timeout(struct ldlm_lock *lock)
 {
        timeout_t timeout;
+       struct obd_device *obd;
+
+       obd = class_exp2obd(lock->l_conn_export);
 
-       if (AT_OFF)
+       if (obd_at_off(obd))
                return obd_timeout;
 
        /*
@@ -183,8 +186,8 @@ static timeout_t ldlm_cp_timeout(struct ldlm_lock *lock)
         * lock from another client.  Server will evict the other client if it
         * doesn't respond reasonably, and then give us the lock.
         */
-       timeout = at_get(ldlm_lock_to_ns_at(lock));
-       return max(3 * timeout, (timeout_t)ldlm_enqueue_min);
+       timeout = obd_at_get(obd, ldlm_lock_to_ns_at(lock));
+       return max(3 * timeout, (timeout_t)obd_get_ldlm_enqueue_min(obd));
 }
 
 /**
@@ -201,6 +204,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock, void *data)
        } else if (data == NULL) {
                LDLM_DEBUG(lock, "client-side enqueue: granted");
        } else {
+               struct obd_device *obd = class_exp2obd(lock->l_conn_export);
                /* Take into AT only CP RPC, not immediately granted locks */
                timeout_t delay = 0;
 
@@ -213,7 +217,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock, void *data)
                LDLM_DEBUG(lock, "client-side enqueue: granted after %ds",
                           delay);
                /* Update our time estimate */
-               at_measured(ldlm_lock_to_ns_at(lock), delay);
+               obd_at_measure(obd, ldlm_lock_to_ns_at(lock), delay);
        }
        return result;
 }
index e41ef8a..30cde96 100644 (file)
@@ -940,7 +940,7 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
        for (idx = 0; idx < (1 << ns->ns_bucket_bits); idx++) {
                struct ldlm_ns_bucket *nsb = &ns->ns_rs_buckets[idx];
 
-               at_init(&nsb->nsb_at_estimate, ldlm_enqueue_min, 0);
+               at_init(&nsb->nsb_at_estimate, obd_get_ldlm_enqueue_min(obd), 0);
                nsb->nsb_namespace = ns;
                nsb->nsb_reclaim_start = 0;
                atomic_set(&nsb->nsb_count, 0);
index d813b2b..239c859 100644 (file)
@@ -789,6 +789,10 @@ static ssize_t grant_shrink_interval_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(grant_shrink_interval);
 
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
 static struct attribute *mdc_attrs[] = {
        &lustre_attr_active.attr,
        &lustre_attr_checksums.attr,
@@ -802,6 +806,9 @@ static struct attribute *mdc_attrs[] = {
        &lustre_attr_grant_shrink_interval.attr,
        &lustre_attr_cur_lost_grant_bytes.attr,
        &lustre_attr_cur_dirty_grant_bytes.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_history.attr,
        NULL,
 };
 
index 293420f..deb35aa 100644 (file)
@@ -493,15 +493,16 @@ static int mdt_statfs(struct tgt_session_info *tsi)
        ktime_t kstart = ktime_get();
        int current_blockbits;
        int rc;
+       timeout_t at_est;
 
        ENTRY;
 
        svcpt = req->rq_rqbd->rqbd_svcpt;
 
        /* This will trigger a watchdog timeout */
+       at_est = obd_at_get(mdt->mdt_lu_dev.ld_obd, &svcpt->scp_at_estimate);
        CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP,
-                        (MDT_SERVICE_WATCHDOG_FACTOR *
-                         at_get(&svcpt->scp_at_estimate)) + 1);
+                        (MDT_SERVICE_WATCHDOG_FACTOR * at_est) + 1);
 
        rc = mdt_check_ucred(info);
        if (rc)
index 61eabd6..39450f3 100644 (file)
@@ -1651,6 +1651,11 @@ LUSTRE_RO_ATTR(num_exports);
 LUSTRE_RW_ATTR(grant_check_threshold);
 LUSTRE_RO_ATTR(eviction_count);
 
+/* per-device at parameters */
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
 static struct attribute *mdt_attrs[] = {
        &lustre_attr_tot_dirty.attr,
        &lustre_attr_tot_granted.attr,
@@ -1697,6 +1702,9 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_enable_remote_subdir_mount.attr,
        &lustre_attr_max_mod_rpcs_in_flight.attr,
        &lustre_attr_enable_dmv_implicit_inherit.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_history.attr,
        NULL,
 };
 
index b965186..5f1dbbf 100644 (file)
@@ -108,11 +108,18 @@ static ssize_t dynamic_nids_store(struct kobject *kobj, struct attribute *attr,
 
 LUSTRE_RW_ATTR(dynamic_nids);
 
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
 static struct attribute *mgc_attrs[] = {
        &lustre_attr_mgs_conn_uuid.attr,
        &lustre_attr_conn_uuid.attr,
        &lustre_attr_ping.attr,
        &lustre_attr_dynamic_nids.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_history.attr,
        NULL,
 };
 
index 291c78d..022320c 100644 (file)
@@ -105,8 +105,9 @@ int mgc_barrier_glimpse_ast(struct ldlm_lock *lock, void *data);
 /* This is the timeout value for MGS_CONNECT request plus a ping interval, such
  * that we can have a chance to try the secondary MGS if any.
  */
-#define  MGC_ENQUEUE_LIMIT (INITIAL_CONNECT_TIMEOUT + (AT_OFF ? 0 : at_min) \
-                               + PING_INTERVAL)
+#define  MGC_ENQUEUE_LIMIT(obd) (INITIAL_CONNECT_TIMEOUT + \
+                                (obd_at_off(obd) ? 0 : obd_get_at_min(obd)) + \
+                                PING_INTERVAL)
 #define  MGC_TARGET_REG_LIMIT 10
 #define  MGC_TARGET_REG_LIMIT_MAX RECONNECT_DELAY_MAX
 #define  MGC_SEND_PARAM_LIMIT 10
index 134a40e..2efb6c1 100644 (file)
@@ -992,9 +992,9 @@ static int mgc_enqueue(struct obd_export *exp, enum ldlm_type type,
            IS_SERVER(s2lsi(cld->cld_cfg.cfg_sb)))
                short_limit = 1;
 
-        /* Limit how long we will wait for the enqueue to complete */
-        req->rq_delay_limit = short_limit ? 5 : MGC_ENQUEUE_LIMIT;
-        rc = ldlm_cli_enqueue(exp, &req, &einfo, &cld->cld_resid, NULL, flags,
+       /* Limit how long we will wait for the enqueue to complete */
+       req->rq_delay_limit = short_limit ? 5 : MGC_ENQUEUE_LIMIT(exp->exp_obd);
+       rc = ldlm_cli_enqueue(exp, &req, &einfo, &cld->cld_resid, NULL, flags,
                              NULL, 0, LVB_T_NONE, lockh, 0);
         /* A failed enqueue should still call the mgc_blocking_ast,
            where it will be requeued if needed ("grant failed"). */
index b36cef0..3d58463 100644 (file)
@@ -250,11 +250,18 @@ static ssize_t mntdev_show(struct kobject *kobj, struct attribute *attr,
 }
 LUSTRE_RO_ATTR(mntdev);
 
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
 static struct attribute *mgs_attrs[] = {
        &lustre_attr_fstype.attr,
        &lustre_attr_mntdev.attr,
        &lustre_attr_eviction_count.attr,
        &lustre_attr_num_exports.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_history.attr,
        NULL,
 };
 
index 8a1165e..f7270f8 100644 (file)
@@ -89,6 +89,7 @@ EXPORT_SYMBOL(ldlm_timeout_set);
 /* bulk transfer timeout, give up after 100s by default */
 unsigned int bulk_timeout = 100; /* seconds */
 EXPORT_SYMBOL(bulk_timeout);
+
 /* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */
 unsigned int at_min = 5;
 EXPORT_SYMBOL(at_min);
index 4f09a04..730a919 100644 (file)
@@ -875,13 +875,14 @@ static void lprocfs_import_seq_show_locked(struct seq_file *m,
                if (imp->imp_at.iat_portal[j] == 0)
                        break;
                k = max_t(unsigned int, k,
-                         at_get(&imp->imp_at.iat_service_estimate[j]));
+                         obd_at_get(imp->imp_obd,
+                                    &imp->imp_at.iat_service_estimate[j]));
        }
        seq_printf(m, "    service_estimates:\n"
                   "       services: %u sec\n"
                   "       network: %d sec\n",
                   k,
-                  at_get(&imp->imp_at.iat_net_latency));
+                  obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency));
 
        seq_printf(m, "    transactions:\n"
                   "       last_replay: %llu\n"
@@ -987,7 +988,7 @@ static void lprocfs_timeouts_seq_show_locked(struct seq_file *m,
                   "last reply", (s64)imp->imp_last_reply_time,
                   (s64)(now - imp->imp_last_reply_time));
 
-       cur_timeout = at_get(&imp->imp_at.iat_net_latency);
+       cur_timeout = obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency);
        worst_timeout = imp->imp_at.iat_net_latency.at_worst_timeout_ever;
        worst_timestamp = imp->imp_at.iat_net_latency.at_worst_timestamp;
        seq_printf(m, "%-10s : cur %3u  worst %3u (at %lld, %llds ago) ",
@@ -1002,7 +1003,7 @@ static void lprocfs_timeouts_seq_show_locked(struct seq_file *m,
                        break;
 
                service_est = &imp->imp_at.iat_service_estimate[i];
-               cur_timeout = at_get(service_est);
+               cur_timeout = obd_at_get(imp->imp_obd, service_est);
                worst_timeout = service_est->at_worst_timeout_ever;
                worst_timestamp = service_est->at_worst_timestamp;
                seq_printf(m, "portal %-2d  : cur %3u  worst %3u (at %lld, %llds ago) ",
index 8ddb93f..79091c4 100644 (file)
@@ -988,6 +988,10 @@ void ofd_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset,
 
 LPROC_SEQ_FOPS(lprocfs_nid_stats_clear);
 
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+
 static struct attribute *ofd_attrs[] = {
        &lustre_attr_tot_dirty.attr,
        &lustre_attr_tot_granted.attr,
@@ -1017,6 +1021,9 @@ static struct attribute *ofd_attrs[] = {
        &lustre_attr_access_log_size.attr,
        &lustre_attr_job_cleanup_interval.attr,
        &lustre_attr_checksum_t10pi_enforce.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_history.attr,
        NULL,
 };
 
index f7f8e3a..21e27f6 100644 (file)
@@ -862,6 +862,11 @@ static int lprocfs_osc_attach_seqstat(struct obd_device *obd)
 }
 #endif /* CONFIG_PROC_FS */
 
+LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
+LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
+LUSTRE_OBD_UINT_PARAM_ATTR(ldlm_enqueue_min);
+
 static struct attribute *osc_attrs[] = {
        &lustre_attr_active.attr,
        &lustre_attr_checksums.attr,
@@ -881,6 +886,10 @@ static struct attribute *osc_attrs[] = {
        &lustre_attr_idle_timeout.attr,
        &lustre_attr_idle_connect.attr,
        &lustre_attr_grant_shrink.attr,
+       &lustre_attr_at_max.attr,
+       &lustre_attr_at_min.attr,
+       &lustre_attr_at_history.attr,
+       &lustre_attr_ldlm_enqueue_min.attr,
        NULL,
 };
 
index b73b0e0..a40e0b4 100644 (file)
@@ -1554,6 +1554,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
        int rc = -EDQUOT;
        int remain;
        bool entered = false;
+       struct obd_device *obd = cli->cl_import->imp_obd;
        /* We cannot wait for a long time here since we are holding ldlm lock
         * across the actual IO. If no requests complete fast (e.g. due to
         * overloaded OST that takes a long time to process everything, we'd
@@ -1562,8 +1563,10 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
         * evicted by server which is half obd_timeout when AT is off
         * or at least ldlm_enqueue_min with AT on.
         * See LU-13131 */
-       unsigned long timeout = cfs_time_seconds(AT_OFF ? obd_timeout / 2 :
-                                                         ldlm_enqueue_min / 2);
+       unsigned long timeout =
+               cfs_time_seconds(obd_at_off(obd) ?
+                                obd_timeout / 2 :
+                                obd_get_ldlm_enqueue_min(obd) / 2);
 
        ENTRY;
 
index 57e012c..f0e1546 100644 (file)
@@ -314,9 +314,12 @@ EXPORT_SYMBOL(ptlrpc_free_bulk);
  */
 void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
 {
+       struct obd_device *obd;
+
        LASSERT(req->rq_import);
+       obd = req->rq_import->imp_obd;
 
-       if (AT_OFF) {
+       if (obd_at_off(obd)) {
                /* non-AT settings */
                /**
                 * \a imp_server_timeout means this is reverse import and
@@ -334,7 +337,7 @@ void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
 
                idx = import_at_get_index(req->rq_import,
                                          req->rq_request_portal);
-               serv_est = at_get(&at->iat_service_estimate[idx]);
+               serv_est = obd_at_get(obd, &at->iat_service_estimate[idx]);
                /*
                 * Currently a 32 bit value is sent over the
                 * wire for rq_timeout so please don't change this
@@ -361,8 +364,10 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
        int idx;
        timeout_t oldse;
        struct imp_at *at;
+       struct obd_device *obd;
 
        LASSERT(req->rq_import);
+       obd = req->rq_import->imp_obd;
        at = &req->rq_import->imp_at;
 
        idx = import_at_get_index(req->rq_import, req->rq_request_portal);
@@ -370,19 +375,28 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
         * max service estimates are tracked on the server side,
         * so just keep minimal history here
         */
-       oldse = at_measured(&at->iat_service_estimate[idx], serv_est);
-       if (oldse != 0)
+       oldse = obd_at_measure(obd, &at->iat_service_estimate[idx], serv_est);
+       if (oldse != 0) {
+               unsigned int at_est = obd_at_get(obd,
+                                               &at->iat_service_estimate[idx]);
                CDEBUG(D_ADAPTTO,
                       "The RPC service estimate for %s ptl %d has changed from %d to %d\n",
                       req->rq_import->imp_obd->obd_name,
                       req->rq_request_portal,
-                      oldse, at_get(&at->iat_service_estimate[idx]));
+                      oldse, at_est);
+       }
 }
 
 /* Expected network latency per remote node (secs) */
 int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
 {
-       return AT_OFF ? 0 : at_get(&req->rq_import->imp_at.iat_net_latency);
+       struct obd_device *obd = NULL;
+
+       if (req->rq_import)
+               obd = req->rq_import->imp_obd;
+
+       return obd_at_off(obd) ?
+              0 : obd_at_get(obd, &req->rq_import->imp_at.iat_net_latency);
 }
 
 /* Adjust expected network latency */
@@ -393,8 +407,10 @@ void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
        struct imp_at *at;
        timeout_t oldnl;
        timeout_t nl;
+       struct obd_device *obd;
 
        LASSERT(req->rq_import);
+       obd = req->rq_import->imp_obd;
 
        if (service_timeout > now - req->rq_sent + 3) {
                /*
@@ -418,13 +434,16 @@ void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
        nl = max_t(timeout_t, now - req->rq_sent - service_timeout, 0) + 1;
        at = &req->rq_import->imp_at;
 
-       oldnl = at_measured(&at->iat_net_latency, nl);
-       if (oldnl != 0)
+       oldnl = obd_at_measure(obd, &at->iat_net_latency, nl);
+       if (oldnl != 0) {
+               timeout_t timeout = obd_at_get(obd, &at->iat_net_latency);
+
                CDEBUG(D_ADAPTTO,
                       "The network latency for %s (nid %s) has changed from %d to %d\n",
                       req->rq_import->imp_obd->obd_name,
                       obd_uuid2str(&req->rq_import->imp_connection->c_remote_uuid),
-                      oldnl, at_get(&at->iat_net_latency));
+                      oldnl, timeout);
+       }
 }
 
 static int unpack_reply(struct ptlrpc_request *req)
index 1145862..f63eaf3 100644 (file)
@@ -569,16 +569,18 @@ static int import_select_connection(struct obd_import *imp)
         */
        if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
                struct adaptive_timeout *at = &imp->imp_at.iat_net_latency;
+               timeout_t timeout = obd_at_get(imp->imp_obd, at);
 
-               if (at_get(at) < CONNECTION_SWITCH_MAX) {
-                       at_measured(at, at_get(at) + CONNECTION_SWITCH_INC);
-                       if (at_get(at) > CONNECTION_SWITCH_MAX)
+               if (timeout < CONNECTION_SWITCH_MAX) {
+                       obd_at_measure(imp->imp_obd, at,
+                                      timeout + CONNECTION_SWITCH_INC);
+                       if (timeout > CONNECTION_SWITCH_MAX)
                                at_reset(at, CONNECTION_SWITCH_MAX);
                }
                LASSERT(imp_conn->oic_last_attempt);
                CDEBUG(D_HA,
                       "%s: tried all connections, increasing latency to %ds\n",
-                      imp->imp_obd->obd_name, at_get(at));
+                      imp->imp_obd->obd_name, timeout);
        }
 
        imp_conn->oic_last_attempt = ktime_get_seconds();
@@ -1497,7 +1499,7 @@ static int signal_completed_replay(struct obd_import *imp)
        req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
        lustre_msg_add_flags(req->rq_reqmsg,
                             MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE);
-       if (AT_OFF)
+       if (obd_at_off(imp->imp_obd))
                req->rq_timeout *= 3;
        req->rq_interpret_reply = completed_replay_interpret;
 
@@ -1731,7 +1733,7 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
                long timeout_jiffies;
                time64_t timeout;
 
-               if (AT_OFF) {
+               if (obd_at_off(imp->imp_obd)) {
                        if (imp->imp_server_timeout)
                                timeout = obd_timeout >> 1;
                        else
@@ -1742,7 +1744,8 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
 
                        req_portal = imp->imp_client->cli_request_portal;
                        idx = import_at_get_index(imp, req_portal);
-                       timeout = at_get(&imp->imp_at.iat_service_estimate[idx]);
+                       timeout = obd_at_get(imp->imp_obd,
+                                       &imp->imp_at.iat_service_estimate[idx]);
                }
 
                timeout_jiffies = cfs_time_seconds(timeout);
@@ -1935,57 +1938,60 @@ void ptlrpc_cleanup_imp(struct obd_import *imp)
  *    but still smoothing out a return to normalcy from a slow response.
  *  - (E.g. remember the maximum latency in each minute of the last 4 minutes.)
  */
-timeout_t at_measured(struct adaptive_timeout *at, timeout_t timeout)
+timeout_t obd_at_measure(struct obd_device *obd, struct adaptive_timeout *at,
+                           timeout_t timeout)
 {
+       unsigned int l_at_min = obd_get_at_min(obd);
+       unsigned int l_at_max = obd_get_at_max(obd);
        timeout_t old_timeout = at->at_current_timeout;
        time64_t now = ktime_get_real_seconds();
-       long binlimit = max_t(long, at_history / AT_BINS, 1);
+       long binlimit = max_t(long, obd_get_at_history(obd) / AT_BINS, 1);
 
-        LASSERT(at);
+       LASSERT(at);
        CDEBUG(D_OTHER, "add %u to %p time=%lld v=%u (%u %u %u %u)\n",
               timeout, at, now - at->at_binstart, at->at_current_timeout,
-               at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
+              at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
 
        if (timeout <= 0)
                /* Negative timeouts and 0's don't count, because we never
                 * want our timeout to drop to 0 or below, and because 0 could
                 * mean an error
                 */
-                return 0;
+               return 0;
 
        spin_lock(&at->at_lock);
 
-        if (unlikely(at->at_binstart == 0)) {
-                /* Special case to remove default from history */
+       if (unlikely(at->at_binstart == 0)) {
+               /* Special case to remove default from history */
                at->at_current_timeout = timeout;
                at->at_worst_timeout_ever = timeout;
                at->at_worst_timestamp = now;
                at->at_hist[0] = timeout;
-                at->at_binstart = now;
-        } else if (now - at->at_binstart < binlimit ) {
-                /* in bin 0 */
+               at->at_binstart = now;
+       } else if (now - at->at_binstart < binlimit) {
+               /* in bin 0 */
                at->at_hist[0] = max_t(timeout_t, timeout, at->at_hist[0]);
                at->at_current_timeout = max_t(timeout_t, timeout,
                                               at->at_current_timeout);
         } else {
-                int i, shift;
+               int i, shift;
                timeout_t maxv = timeout;
 
                /* move bins over */
                shift = (u32)(now - at->at_binstart) / binlimit;
-                LASSERT(shift > 0);
-                for(i = AT_BINS - 1; i >= 0; i--) {
-                        if (i >= shift) {
-                                at->at_hist[i] = at->at_hist[i - shift];
+               LASSERT(shift > 0);
+               for (i = AT_BINS - 1; i >= 0; i--) {
+                       if (i >= shift) {
+                               at->at_hist[i] = at->at_hist[i - shift];
                                maxv = max_t(timeout_t, maxv, at->at_hist[i]);
-                        } else {
-                                at->at_hist[i] = 0;
-                        }
-                }
+                       } else {
+                               at->at_hist[i] = 0;
+                       }
+               }
                at->at_hist[0] = timeout;
                at->at_current_timeout = maxv;
-                at->at_binstart += shift * binlimit;
-        }
+               at->at_binstart += shift * binlimit;
+       }
 
        if (at->at_current_timeout > at->at_worst_timeout_ever) {
                at->at_worst_timeout_ever = at->at_current_timeout;
@@ -1993,23 +1999,24 @@ timeout_t at_measured(struct adaptive_timeout *at, timeout_t timeout)
        }
 
        if (at->at_flags & AT_FLG_NOHIST)
-                /* Only keep last reported val; keeping the rest of the history
+               /* Only keep last reported val; keeping the rest of the history
                 * for debugfs only
                 */
                at->at_current_timeout = timeout;
 
-        if (at_max > 0)
+       if (l_at_max > 0)
                at->at_current_timeout = min_t(timeout_t,
-                                              at->at_current_timeout, at_max);
+                                              at->at_current_timeout,
+                                              l_at_max);
        at->at_current_timeout = max_t(timeout_t, at->at_current_timeout,
-                                      at_min);
+                                      l_at_min);
        if (at->at_current_timeout != old_timeout)
                CDEBUG(D_OTHER,
                       "AT %p change: old=%u new=%u delta=%d (val=%d) hist %u %u %u %u\n",
                       at, old_timeout, at->at_current_timeout,
                       at->at_current_timeout - old_timeout, timeout,
-                       at->at_hist[0], at->at_hist[1], at->at_hist[2],
-                       at->at_hist[3]);
+                      at->at_hist[0], at->at_hist[1], at->at_hist[2],
+                      at->at_hist[3]);
 
        /* if we changed, report the old timeout value */
        old_timeout = (at->at_current_timeout != old_timeout) ? old_timeout : 0;
index d34bcc3..7503271 100644 (file)
@@ -1116,14 +1116,14 @@ static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file *m, void *n)
        timeout_t worst_timeout;
        int i;
 
-       if (AT_OFF) {
+       if (obd_at_off(NULL)) {
                seq_printf(m, "adaptive timeouts off, using obd_timeout %u\n",
                           obd_timeout);
                return 0;
        }
 
        ptlrpc_service_for_each_part(svcpt, i, svc) {
-               cur_timeout = at_get(&svcpt->scp_at_estimate);
+               cur_timeout = obd_at_get(NULL, &svcpt->scp_at_estimate);
                worst_timeout = svcpt->scp_at_estimate.at_worst_timeout_ever;
                worst_timestamp = svcpt->scp_at_estimate.at_worst_timestamp;
 
index 8047bc5..e00bcb4 100644 (file)
@@ -104,7 +104,7 @@ static int ptl_send_buf(struct lnet_handle_md *mdh, void *base, int len,
 }
 
 #define mdunlink_iterate_helper(mds, count) \
-               __mdunlink_iterate_helper(mds, count, false) 
+               __mdunlink_iterate_helper(mds, count, false)
 static void __mdunlink_iterate_helper(struct lnet_handle_md *bd_mds,
                                      int count, bool discard)
 {
@@ -510,10 +510,16 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
        struct ptlrpc_service_part      *svcpt = req->rq_rqbd->rqbd_svcpt;
        struct ptlrpc_service           *svc = svcpt->scp_service;
        timeout_t service_timeout;
+       struct obd_device *obd = NULL;
 
+       if (req->rq_export)
+               obd = req->rq_export->exp_obd;
+
+       service_timeout = obd_at_off(obd) ?
+                         obd_timeout * 3 / 2 : obd_get_at_max(obd);
        service_timeout = clamp_t(timeout_t, ktime_get_real_seconds() -
-                                            req->rq_arrival_time.tv_sec, 1,
-                                 (AT_OFF ? obd_timeout * 3 / 2 : at_max));
+                                 req->rq_arrival_time.tv_sec, 1,
+                                 service_timeout);
         if (!(flags & PTLRPC_REPLY_EARLY) &&
             (req->rq_type != PTL_RPC_MSG_ERR) &&
             (req->rq_reqmsg != NULL) &&
@@ -523,14 +529,14 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                 /* early replies, errors and recovery requests don't count
                 * toward our service time estimate
                 */
-               timeout_t oldse = at_measured(&svcpt->scp_at_estimate,
-                                             service_timeout);
+               timeout_t oldse = obd_at_measure(obd, &svcpt->scp_at_estimate,
+                                                service_timeout);
 
                if (oldse != 0) {
                        DEBUG_REQ(D_ADAPTTO, req,
                                  "svc %s changed estimate from %d to %d",
                                  svc->srv_name, oldse,
-                                 at_get(&svcpt->scp_at_estimate));
+                                 obd_at_get(obd, &svcpt->scp_at_estimate));
                }
         }
         /* Report actual service time for client latency calc */
@@ -540,8 +546,7 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
         * b=15815
         */
        if (req->rq_type == PTL_RPC_MSG_ERR &&
-           (req->rq_export == NULL ||
-            req->rq_export->exp_obd->obd_recovering)) {
+           (req->rq_export == NULL || obd->obd_recovering)) {
                lustre_msg_set_timeout(req->rq_repmsg, 0);
        } else {
                timeout_t timeout;
@@ -550,14 +555,12 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                    (flags & PTLRPC_REPLY_EARLY) &&
                    lustre_msg_get_flags(req->rq_reqmsg) &
                    (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
-                       struct obd_device *exp_obd = req->rq_export->exp_obd;
-
                        timeout = ktime_get_real_seconds() -
                                  req->rq_arrival_time.tv_sec +
                                  min_t(timeout_t, at_extra,
-                                       exp_obd->obd_recovery_timeout / 4);
+                                       obd->obd_recovery_timeout / 4);
                } else {
-                       timeout = at_get(&svcpt->scp_at_estimate);
+                       timeout = obd_at_get(obd, &svcpt->scp_at_estimate);
                }
                lustre_msg_set_timeout(req->rq_repmsg, timeout);
        }
@@ -803,11 +806,11 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
         * This check has a race with ptlrpc_connect_import_locked()
         * with low chance, don't panic, only report.
         */
-       if (!(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL ||
+       if (!(obd_at_off(obd) || imp->imp_state != LUSTRE_IMP_FULL ||
            (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) ||
            !(imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_AT))) {
                DEBUG_REQ(D_HA, request, "Wrong state of import detected, AT=%d, imp=%d, msghdr=%d, conn=%d\n",
-                         AT_OFF, imp->imp_state != LUSTRE_IMP_FULL,
+                         obd_at_off(obd), imp->imp_state != LUSTRE_IMP_FULL,
                          (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT),
                          !(imp->imp_connect_data.ocd_connect_flags &
                            OBD_CONNECT_AT));
index 4cb2585..4c800c9 100644 (file)
@@ -121,8 +121,9 @@ static void ptlrpc_update_next_ping(struct obd_import *imp, int soon)
 
        if (imp->imp_state == LUSTRE_IMP_DISCON) {
                time64_t dtime = max_t(time64_t, CONNECTION_SWITCH_MIN,
-                                      AT_OFF ? 0 :
-                                      at_get(&imp->imp_at.iat_net_latency));
+                                      obd_at_off(imp->imp_obd) ? 0 :
+                                      obd_at_get(imp->imp_obd,
+                                               &imp->imp_at.iat_net_latency));
                time = min(time, dtime);
        }
        imp->imp_next_ping = ktime_get_seconds() + time;
index cc33cb9..ea51d80 100644 (file)
@@ -642,7 +642,7 @@ static int ptlrpc_service_part_init(struct ptlrpc_service *svc,
        spin_lock_init(&svcpt->scp_at_lock);
        array = &svcpt->scp_at_array;
 
-       size = at_est2timeout(at_max);
+       size = at_est2timeout(obd_get_at_max(NULL));
        array->paa_size     = size;
        array->paa_count    = 0;
        array->paa_deadline = -1;
@@ -1287,8 +1287,12 @@ static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
        struct ptlrpc_at_array *array = &svcpt->scp_at_array;
        struct ptlrpc_request *rq = NULL;
        __u32 index;
+       struct obd_device *obd = NULL;
 
-       if (AT_OFF)
+       if (req->rq_export)
+               obd = req->rq_export->exp_obd;
+
+       if (obd_at_off(obd))
                return(0);
 
        if (req->rq_no_reply)
@@ -1365,9 +1369,13 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
        timeout_t olddl = req->rq_deadline - ktime_get_real_seconds();
        time64_t newdl;
        int rc;
+       struct obd_device *obd = NULL;
 
        ENTRY;
 
+       if (req->rq_export)
+               obd = req->rq_export->exp_obd;
+
        if (CFS_FAIL_CHECK(OBD_FAIL_TGT_REPLAY_RECONNECT) ||
            CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_ENQ_RESEND)) {
                /* don't send early reply */
@@ -1380,11 +1388,11 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
         */
        DEBUG_REQ(D_ADAPTTO, req,
                  "%ssending early reply (deadline %+ds, margin %+ds) for %d+%d",
-                 AT_OFF ? "AT off - not " : "",
-                 olddl, olddl - at_get(&svcpt->scp_at_estimate),
-                 at_get(&svcpt->scp_at_estimate), at_extra);
+                 obd_at_off(obd) ? "AT off - not " : "",
+                 olddl, olddl - obd_at_get(obd, &svcpt->scp_at_estimate),
+                 obd_at_get(obd, &svcpt->scp_at_estimate), at_extra);
 
-       if (AT_OFF)
+       if (obd_at_off(obd))
                RETURN(0);
 
        if (olddl < 0) {
@@ -1435,11 +1443,11 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                 * based on this service estimate (plus some additional time to
                 * account for network latency). See ptlrpc_at_recv_early_reply
                 */
-               at_measured(&svcpt->scp_at_estimate, at_extra +
+               obd_at_measure(obd, &svcpt->scp_at_estimate, at_extra +
                            ktime_get_real_seconds() -
                            req->rq_arrival_time.tv_sec);
                newdl = req->rq_arrival_time.tv_sec +
-                       at_get(&svcpt->scp_at_estimate);
+                       obd_at_get(obd, &svcpt->scp_at_estimate);
        }
 
        /*
@@ -1621,11 +1629,14 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt)
                 * We're already past request deadlines before we even get a
                 * chance to send early replies
                 */
+               timeout_t atg = obd_at_get((struct obd_device *)NULL,
+                                          &svcpt->scp_at_estimate);
                LCONSOLE_WARN("'%s' is processing requests too slowly, client may timeout. Late by %ds, missed %d early replies (reqs waiting=%d active=%d, at_estimate=%d, delay=%lldms)\n",
                              svcpt->scp_service->srv_name, -first, counter,
                              svcpt->scp_nreqs_incoming,
                              svcpt->scp_nreqs_active,
-                             at_get(&svcpt->scp_at_estimate), delay_ms);
+                             atg,
+                             delay_ms);
        }
 
        /*
@@ -2252,6 +2263,7 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
        s64 timediff_usecs;
        s64 arrived_usecs;
        int fail_opc = 0;
+       struct obd_device *obd = NULL;
 
        ENTRY;
 
@@ -2259,6 +2271,9 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
        if (request == NULL)
                RETURN(0);
 
+       if (request->rq_export)
+               obd = request->rq_export->exp_obd;
+
        if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT))
                fail_opc = OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT;
        else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
@@ -2285,7 +2300,7 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
                lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR,
                                    svcpt->scp_nreqs_active);
                lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT,
-                                   at_get(&svcpt->scp_at_estimate));
+                                   obd_at_get(obd, &svcpt->scp_at_estimate));
        }
 
        if (likely(request->rq_export)) {
@@ -3623,6 +3638,7 @@ static int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt)
        struct ptlrpc_request *request = NULL;
        struct timespec64 right_now;
        struct timespec64 timediff;
+       struct obd_device *obd = NULL;
 
        ktime_get_real_ts64(&right_now);
 
@@ -3641,8 +3657,11 @@ static int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt)
        timediff = timespec64_sub(right_now, request->rq_arrival_time);
        spin_unlock(&svcpt->scp_req_lock);
 
+       if (request->rq_export)
+               obd = request->rq_export->exp_obd;
+
        if ((timediff.tv_sec) >
-           (AT_OFF ? obd_timeout * 3 / 2 : at_max)) {
+           (obd_at_off(obd) ? obd_timeout * 3 / 2 : obd_get_at_max(obd))) {
                CERROR("%s: unhealthy - request has been waiting %llds\n",
                       svcpt->scp_service->srv_name, (s64)timediff.tv_sec);
                return -1;
@@ -3669,3 +3688,15 @@ ptlrpc_service_health_check(struct ptlrpc_service *svc)
        return 0;
 }
 EXPORT_SYMBOL(ptlrpc_service_health_check);
+
+int
+ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt)
+{
+       int at = 0;
+
+       if (!obd_at_off(NULL))
+               at = obd_at_get(NULL, &svcpt->scp_at_estimate);
+
+       return svcpt->scp_service->srv_watchdog_factor *
+              max_t(int, at, obd_timeout);
+}
index 5db3dfe..b25ee34 100644 (file)
@@ -1190,8 +1190,13 @@ static int qmt_dqacq(const struct lu_env *env, struct lu_device *ld,
        struct ldlm_lock *lock;
        int rtype, qtype;
        int rc, idx, stype;
+       struct obd_device *obd = NULL;
+
        ENTRY;
 
+       if (req->rq_export)
+               obd = req->rq_export->exp_obd;
+
        qbody = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_BODY);
        if (qbody == NULL)
                RETURN(err_serious(-EPROTO));
@@ -1247,7 +1252,9 @@ static int qmt_dqacq(const struct lu_env *env, struct lu_device *ld,
                        timeout_t timeout;
 
                        svc = req->rq_rqbd->rqbd_svcpt;
-                       timeout = at_est2timeout(at_get(&svc->scp_at_estimate));
+                       timeout = at_est2timeout(
+                                       obd_at_get(obd, &svc->scp_at_estimate));
+
                        timeout += (ldlm_bl_timeout(lock) >> 1);
 
                        /* lock is being cancelled, prolong timeout */
index b530943..d6ad898 100644 (file)
@@ -330,9 +330,11 @@ static inline void qsd_set_edquot(struct lquota_entry *lqe, bool edquot)
  * quota space */
 static inline int qsd_wait_timeout(struct qsd_instance *qsd)
 {
+       struct obd_device *obd = qsd->qsd_dev->dd_lu_dev.ld_obd;
+
        if (qsd->qsd_timeout != 0)
                return qsd->qsd_timeout;
-       return min_t(int, at_max / 2, obd_timeout / 2);
+       return min_t(int, obd_get_at_max(obd) / 2, obd_timeout / 2);
 }
 
 /* qsd_entry.c */
index 41518f6..e83a681 100644 (file)
@@ -6027,6 +6027,109 @@ test_73() { #LU-3006
 }
 run_test 73 "failnode to update from mountdata properly"
 
+# LU-15246
+test_74() {
+       (( $MDS1_VERSION >= $(version_code 2.15.57.16) )) ||
+               skip "need MDS version >= 2.15.57.16 for per-device timeouts"
+
+       setup
+       stack_trap "cleanup"
+
+       # Prepare fs2, share the mgs of fs
+       local FSNAME2=fs15246
+       local fs2mdsdev=$(mdsdevname 1_2)
+       local fs2ostdev=$(ostdevname 1_2)
+       local fs2mdsvdev=$(mdsvdevname 1_2)
+       local fs2ostvdev=$(ostvdevname 1_2)
+
+       add fs2mds $(mkfs_opts mds1 $fs2mdsdev) --fsname=$FSNAME2 \
+               --reformat $fs2mdsdev $fs2mdsvdev || error "add fs2mds failed"
+       add fs2ost $(mkfs_opts ost1 $fs2ostdev) --fsname=$FSNAME2 \
+               --reformat $fs2ostdev $fs2ostvdev || error "add fs2ost failed"
+
+       stack_trap "cleanup_fs2"
+
+       start fs2ost $fs2ostdev $OST_MOUNT_OPTS || error "start fs2ost failed"
+       start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS || error "start fs2mds failed"
+
+       mkdir -p $MOUNT2 || error "mkdir $MOUNT2 failed"
+       $MOUNT_CMD $MGSNID:/$FSNAME2 $MOUNT2 || error "mount $MOUNT2 failed"
+
+       echo "========== All mounted lustre fs ===================="
+       mount | grep 'type lustre'
+       echo "====================================================="
+
+       # Set and check osc/ldlm_enqueue_min
+       $LCTL set_param osc.${FSNAME}-*.ldlm_enqueue_min=99 ||
+               error "failed to set per-device adaptive parameters on client"
+       stack_trap "$LCTL set_param osc.${FSNAME}-*.ldlm_enqueue_min=0"
+
+       local ldlm_enqueue_min
+       ldlm_enqueue_min=$($LCTL get_param -n osc.${FSNAME}-*.ldlm_enqueue_min \
+                          | uniq)
+       (( $ldlm_enqueue_min == 99 )) ||
+               error "wrong ldlm_enqueue_min value for osc.${FSNAME}-*"
+
+       # Check fs2 as reference
+       ldlm_enqueue_min=$($LCTL get_param -n osc.${FSNAME2}-*.ldlm_enqueue_min\
+                          | uniq)
+       (( $ldlm_enqueue_min == 0 )) ||
+               error "wrong ldlm_enqueue_min value for osc.${FSNAME2}-*"
+
+       # Set and check obdfilter/at_min
+       do_facet ost1 $LCTL set_param obdfilter.${FSNAME}-*.at_min=1 ||
+               error "failed to set per-device adaptive parameters on ost"
+       stack_trap "do_facet ost1 $LCTL set_param obdfilter.${FSNAME}-*.at_min=0"
+
+       local at_min
+       at_min=$(do_facet ost1 $LCTL get_param -n obdfilter.${FSNAME}-*.at_min \
+               | uniq)
+       (( $at_min == 1 )) ||
+               error "wrong at_min value for obdfilter.${FSNAME}-*"
+
+       # set and check mdc/at_max
+       $LCTL set_param mdc.${FSNAME}-*.at_max=599 ||
+               error "failed to set per-device adaptive parameters on client"
+       stack_trap "$LCTL set_param mdc.${FSNAME}-*.at_max=0"
+
+       local at_max
+       at_max=$($LCTL get_param -n mdc.${FSNAME}-*.at_max | uniq)
+       (( $at_max == 599 )) ||
+               error "wrong at_max value for osc.${FSNAME}-*"
+
+       # Check fs2 as reference
+       at_max=$($LCTL get_param -n mdc.${FSNAME2}-*.at_max | uniq)
+       (( $at_max == 0 )) ||
+               error "wrong at_max value for osc.${FSNAME2}-*"
+
+       # Set and check mds/at_max
+       do_facet mds1 $LCTL set_param *.${FSNAME}-*.at_max=599 ||
+               error "failed to set per-device adaptive parameters on mds"
+       stack_trap "do_facet mds1 $LCTL set_param *.${FSNAME}-*.at_max=0"
+
+       local at_max
+       at_max=$(do_facet mds1 $LCTL get_param -n mdt.${FSNAME}-*.at_max | uniq)
+       (( at_max == 599 )) ||
+               error "wrong at_max value for mdt.${FSNAME}-*"
+
+       # Set and check mgs&mgc/at_history
+       local mgs_nid=$(do_facet $SINGLEMDS $LCTL list_nids | tail -1)
+       $LCTL set_param mgc.MGC$mgs_nid.at_history=588
+       stack_trap "$LCTL set_param mgc.MGC$mgs_nid.at_history=0"
+
+       local at_history
+       at_history=$($LCTL get_param -n mgc.MGC$mgs_nid.at_history)
+       (( $at_history == 588 )) ||
+               error "wrong at_history value for mgc.MGC$mgs_nid"
+
+       do_facet mgs $LCTL set_param mgs.MGS.at_history=588
+       stack_trap "do_facet mgs $LCTL set_param mgs.MGS.at_history=0"
+       at_history=$(do_facet mgs $LCTL get_param -n mgs.MGS.at_history)
+       (( $at_history == 588 )) ||
+               error "wrong at_history value for mgs.MGS"
+}
+run_test 74 "Test per-device adaptive timeout parameters"
+
 test_75() { # LU-2374
        [[ "$MDS1_VERSION" -lt $(version_code 2.4.1) ]] &&
                skip "Need MDS version at least 2.4.1"
@@ -6037,6 +6140,7 @@ test_75() { # LU-2374
        local opts_ost="$(mkfs_opts ost1 $(ostdevname 1)) \
                --replace --reformat $(ostdevname 1) $(ostvdevname 1)"
 
+       load_modules
        #check with default parameters
        add mds1 $opts_mds || error "add mds1 failed for default params"
        add ost1 $opts_ost || error "add ost1 failed for default params"