Whamcloud - gitweb
LU-13600 ptlrpc: limit rate of lock replays
[fs/lustre-release.git] / lustre / include / lustre_import.h
index 5ff1f84..c4760d9 100644 (file)
  *
  * @{
  */
-
-#include <lustre_handles.h>
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+#include <linux/spinlock.h>
+#include <linux/time.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <libcfs/libcfs.h>
 #include <uapi/linux/lustre/lustre_idl.h>
 
 /**
@@ -59,9 +66,13 @@ struct adaptive_timeout {
        time64_t        at_binstart;         /* bin start time */
        unsigned int    at_hist[AT_BINS];    /* timeout history bins */
        unsigned int    at_flags;
-       unsigned int    at_current;          /* current timeout value */
-       unsigned int    at_worst_ever;       /* worst-ever timeout value */
-       time64_t        at_worst_time;       /* worst-ever timeout timestamp */
+       timeout_t       at_current_timeout;     /* current timeout value */
+       timeout_t       at_worst_timeout_ever;  /* worst-ever timeout delta
+                                                * value
+                                                */
+       time64_t        at_worst_timestamp;     /* worst-ever timeout
+                                                * timestamp
+                                                */
        spinlock_t      at_lock;
 };
 
@@ -100,19 +111,21 @@ enum lustre_imp_state {
         LUSTRE_IMP_RECOVER    = 8,
         LUSTRE_IMP_FULL       = 9,
         LUSTRE_IMP_EVICTED    = 10,
+       LUSTRE_IMP_IDLE       = 11,
+       LUSTRE_IMP_LAST
 };
 
 /** Returns test string representation of numeric import state \a state */
-static inline char * ptlrpc_import_state_name(enum lustre_imp_state state)
+static inline const char *ptlrpc_import_state_name(enum lustre_imp_state state)
 {
-        static char* import_state_names[] = {
-                "<UNKNOWN>", "CLOSED",  "NEW", "DISCONN",
-                "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
-                "RECOVER", "FULL", "EVICTED",
-        };
-
-        LASSERT (state <= LUSTRE_IMP_EVICTED);
-        return import_state_names[state];
+       static const char * const import_state_names[] = {
+               "<UNKNOWN>", "CLOSED",  "NEW", "DISCONN",
+               "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
+               "RECOVER", "FULL", "EVICTED", "IDLE",
+       };
+
+       LASSERT(state < LUSTRE_IMP_LAST);
+       return import_state_names[state];
 }
 
 /**
@@ -156,10 +169,8 @@ struct import_state_hist {
  * Imports are representing client-side view to remote target.
  */
 struct obd_import {
-       /** Local handle (== id) for this import. */
-       struct portals_handle     imp_handle;
        /** Reference counter */
-       atomic_t                  imp_refcount;
+       refcount_t                imp_refcount;
        struct lustre_handle      imp_dlm_handle; /* client's ldlm export */
        /** Currently active connection */
        struct ptlrpc_connection *imp_connection;
@@ -167,8 +178,8 @@ struct obd_import {
         struct ptlrpc_client     *imp_client;
        /** List element for linking into pinger chain */
        struct list_head          imp_pinger_chain;
-       /** List element for linking into chain for destruction */
-       struct list_head          imp_zombie_chain;
+       /** work struct for destruction of import */
+       struct work_struct        imp_zombie_work;
 
         /**
          * Lists of requests that are retained for replay, waiting for a reply,
@@ -204,7 +215,7 @@ struct obd_import {
          * @{
          */
        struct ptlrpc_sec        *imp_sec;
-       struct mutex              imp_sec_mutex;
+       rwlock_t                  imp_sec_lock;
        time64_t                imp_sec_expire;
        pid_t                     imp_sec_refpid;
         /** @} */
@@ -212,12 +223,17 @@ struct obd_import {
        /** Wait queue for those who need to wait for recovery completion */
        wait_queue_head_t         imp_recovery_waitq;
 
+       /** Number of requests allocated */
+       atomic_t                  imp_reqs;
        /** Number of requests currently in-flight */
        atomic_t                  imp_inflight;
        /** Number of requests currently unregistering */
        atomic_t                  imp_unregistering;
        /** Number of replay requests inflight */
        atomic_t                  imp_replay_inflight;
+       /** In-flight replays rate control */
+       wait_queue_head_t         imp_replay_waitq;
+
        /** Number of currently happening import invalidations */
        atomic_t                  imp_inval_count;
        /** Numbner of request timeouts */
@@ -231,6 +247,8 @@ struct obd_import {
         int                       imp_state_hist_idx;
         /** Current import generation. Incremented on every reconnect */
         int                       imp_generation;
+       /** Idle connection initiated at this generation */
+       int                       imp_initiated_at;
         /** Incremented every time we send reconnection request */
         __u32                     imp_conn_cnt;
        /** 
@@ -270,8 +288,7 @@ struct obd_import {
        spinlock_t                imp_lock;
 
        /* flags */
-       unsigned long             imp_no_timeout:1, /* timeouts are disabled */
-                                 imp_invalid:1,    /* evicted */
+       unsigned long             imp_invalid:1,    /* evicted */
                                  /* administratively disabled */
                                  imp_deactive:1,
                                  /* try to recover the import */
@@ -294,22 +311,24 @@ struct obd_import {
                                  imp_resend_replay:1,
                                  /* disable normal recovery, for test only. */
                                  imp_no_pinger_recover:1,
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
-                                 /* need IR MNE swab */
-                                 imp_need_mne_swab:1,
-#endif
                                  /* import must be reconnected instead of
                                   * chouse new connection */
                                  imp_force_reconnect:1,
                                  /* import has tried to connect with server */
                                  imp_connect_tried:1,
                                  /* connected but not FULL yet */
-                                 imp_connected:1;
-       __u32                     imp_connect_op;
-       struct obd_connect_data   imp_connect_data;
-       __u64                     imp_connect_flags_orig;
-       __u64                     imp_connect_flags2_orig;
-       int                       imp_connect_error;
+                                 imp_connected:1,
+                                 /* grant shrink disabled */
+                                 imp_grant_shrink_disabled:1,
+                                 /* to supress LCONSOLE() at conn.restore */
+                                 imp_was_idle:1;
+       u32                       imp_connect_op;
+       u32                       imp_idle_timeout;
+       u32                       imp_idle_debug;
+       struct obd_connect_data   imp_connect_data;
+       __u64                     imp_connect_flags_orig;
+       __u64                     imp_connect_flags2_orig;
+       int                       imp_connect_error;
 
        enum lustre_msg_magic   imp_msg_magic;
                                /* adjusted based on server capability */
@@ -320,64 +339,83 @@ struct obd_import {
        time64_t                imp_last_reply_time;    /* for health check */
 };
 
-/* import.c */
-static inline unsigned int at_est2timeout(unsigned int val)
+/* import.c : adaptive timeout handling.
+ *
+ * Lustre tracks how long RPCs take to complete. This information is reported
+ * back to clients who utilize the information to estimate the time needed
+ * for future requests and set appropriate RPC timeouts. Minimum and maximum
+ * service times can be configured via the at_min and at_max kernel module
+ * parameters, respectively.
+ *
+ * Since this information is transmitted between nodes the timeouts are in
+ * seconds not jiffies which can vary from node to node. To avoid confusion
+ * the timeout is handled in timeout_t (s32) instead of time64_t or
+ * long (jiffies).
+ */
+static inline timeout_t at_est2timeout(timeout_t timeout)
 {
-        /* add an arbitrary minimum: 125% +5 sec */
-        return (val + (val >> 2) + 5);
+       /* add an arbitrary minimum: 125% +5 sec */
+       return timeout + (timeout >> 2) + 5;
 }
 
-static inline unsigned int at_timeout2est(unsigned int val)
+static inline timeout_t at_timeout2est(timeout_t timeout)
 {
-        /* restore estimate value from timeout: e=4/5(t-5) */
-        LASSERT(val);
-        return (max((val << 2) / 5, 5U) - 4);
+       /* restore estimate value from timeout: e=4/5(t-5) */
+       LASSERT(timeout > 0);
+       return max((timeout << 2) / 5, 5) - 4;
 }
 
-static inline void at_reset_nolock(struct adaptive_timeout *at, int val)
+static inline void at_reset_nolock(struct adaptive_timeout *at,
+                                  timeout_t timeout)
 {
-        at->at_current = val;
-        at->at_worst_ever = val;
-       at->at_worst_time = ktime_get_real_seconds();
+       at->at_current_timeout = timeout;
+       at->at_worst_timeout_ever = timeout;
+       at->at_worst_timestamp = ktime_get_real_seconds();
 }
 
-static inline void at_reset(struct adaptive_timeout *at, int val)
+static inline void at_reset(struct adaptive_timeout *at, timeout_t timeout)
 {
        spin_lock(&at->at_lock);
-       at_reset_nolock(at, val);
+       at_reset_nolock(at, timeout);
        spin_unlock(&at->at_lock);
 }
 
-static inline void at_init(struct adaptive_timeout *at, int val, int flags) {
+static inline void at_init(struct adaptive_timeout *at, timeout_t timeout,
+                          int flags)
+{
        memset(at, 0, sizeof(*at));
        spin_lock_init(&at->at_lock);
        at->at_flags = flags;
-       at_reset(at, val);
+       at_reset(at, timeout);
 }
 
-static inline void at_reinit(struct adaptive_timeout *at, int val, int flags)
+static inline void at_reinit(struct adaptive_timeout *at, timeout_t timeout,
+                            int flags)
 {
        spin_lock(&at->at_lock);
        at->at_binstart = 0;
        memset(at->at_hist, 0, sizeof(at->at_hist));
        at->at_flags = flags;
-       at_reset_nolock(at, val);
+       at_reset_nolock(at, timeout);
        spin_unlock(&at->at_lock);
 }
 
 extern unsigned int at_min;
-static inline int at_get(struct adaptive_timeout *at) {
-        return (at->at_current > at_min) ? at->at_current : at_min;
-}
-int at_measured(struct adaptive_timeout *at, unsigned int val);
-int import_at_get_index(struct obd_import *imp, int portal);
 extern unsigned int at_max;
 #define AT_OFF (at_max == 0)
 
+static inline timeout_t at_get(struct adaptive_timeout *at)
+{
+       return (at->at_current_timeout > at_min) ?
+               at->at_current_timeout : at_min;
+}
+
+timeout_t at_measured(struct adaptive_timeout *at, timeout_t timeout);
+int import_at_get_index(struct obd_import *imp, int portal);
+
 /* genops.c */
 struct obd_export;
 extern struct obd_import *class_exp2cliimp(struct obd_export *);
-extern struct obd_import *class_conn2cliimp(struct lustre_handle *);
 
 /** @} import */