Revert "LU-11771 ldlm: use hrtimer for recovery to fix timeout messages"

[fs/lustre-release.git] / lustre / include / obd.h
diff --git a/lustre/include/obd.h b/lustre/include/obd.h

index e270aa5..242bfb8 100644 (file)
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -132,7 +132,7 @@ struct timeout_item {
  #define OBD_MAX_RIF_DEFAULT    8
  #define OBD_MAX_RIF_MAX                512
  #define OSC_MAX_RIF_MAX                256
-#define OSC_MAX_DIRTY_DEFAULT  (OBD_MAX_RIF_DEFAULT * 4)
+#define OSC_MAX_DIRTY_DEFAULT  2000     /* Arbitrary large value */
  #define OSC_MAX_DIRTY_MB_MAX   2048     /* arbitrary, but < MAX_LONG bytes */
  #define OSC_DEFAULT_RESENDS    10
  
@@ -187,6 +187,17 @@ struct client_obd {
          * run-time if a larger observed size is advertised by the MDT. */
         __u32                    cl_max_mds_easize;
  
+       /* Data-on-MDT specific value to set larger reply buffer for possible
+        * data read along with open/stat requests. By default it tries to use
+        * unused space in reply buffer.
+        * This value is used to ensure that reply buffer has at least as
+        * much free space as value indicates. That free space is gained from
+        * LOV EA buffer which is small for DoM files and on big systems can
+        * provide up to 32KB of extra space in reply buffer.
+        * Default value is 8K now.
+        */
+       __u32                    cl_dom_min_inline_repsize;
+
         enum lustre_sec_part     cl_sp_me;
         enum lustre_sec_part     cl_sp_to;
         struct sptlrpc_flavor    cl_flvr_mgc; /* fixed flavor of mgc->mgs */
@@ -207,7 +218,7 @@ struct client_obd {
         long                    cl_reserved_grant;
         struct list_head        cl_cache_waiters; /* waiting for cache/grant */
         time64_t                cl_next_shrink_grant;   /* seconds */
-       struct list_head        cl_grant_shrink_list;  /* Timeout event list */
+       struct list_head        cl_grant_chain;
         time64_t                cl_grant_shrink_interval; /* seconds */
  
         /* A chunk is an optimal size used by osc_extent to determine
@@ -246,9 +257,9 @@ struct client_obd {
         /* just a sum of the loi/lop pending numbers to be exported by /proc */
         atomic_t                cl_pending_w_pages;
         atomic_t                cl_pending_r_pages;
-       __u32                   cl_max_pages_per_rpc;
-       __u32                   cl_max_rpcs_in_flight;
-       __u32                   cl_short_io_bytes;
+       u32                     cl_max_pages_per_rpc;
+       u32                     cl_max_rpcs_in_flight;
+       u32                     cl_max_short_io_bytes;
         struct obd_histogram    cl_read_rpc_hist;
         struct obd_histogram    cl_write_rpc_hist;
         struct obd_histogram    cl_read_page_hist;
@@ -295,8 +306,6 @@ struct client_obd {
         atomic_t                 cl_destroy_in_flight;
         wait_queue_head_t        cl_destroy_waitq;
  
-        struct mdc_rpc_lock     *cl_rpc_lock;
-
         /* modify rpcs in flight
          * currently used for metadata only */
         spinlock_t               cl_mod_rpcs_lock;
@@ -337,8 +346,11 @@ struct client_obd {
         /* ptlrpc work for writeback in ptlrpcd context */
         void                    *cl_writeback_work;
         void                    *cl_lru_work;
+       struct mutex              cl_quota_mutex;
         /* hash tables for osc_quota_info */
         struct cfs_hash         *cl_quota_hash[LL_MAXQUOTAS];
+       /* the xid of the request updating the hash tables */
+       __u64                    cl_quota_last_xid;
         /* Links to the global list of registered changelog devices */
         struct list_head         cl_chg_dev_linkage;
  };
@@ -410,6 +422,8 @@ struct lov_obd {
         struct rw_semaphore     lov_notify_lock;
         /* Data-on-MDT: MDC array */
         struct lov_md_tgt_desc  *lov_mdc_tgts;
+
+       struct kobject          *lov_tgts_kobj;
  };
  
  struct lmv_tgt_desc {
@@ -425,19 +439,24 @@ struct lmv_obd {
         struct lu_client_fld    lmv_fld;
         spinlock_t              lmv_lock;
         struct lmv_desc         desc;
-       struct proc_dir_entry   *targets_proc_entry;
  
         struct mutex            lmv_init_mutex;
         int                     connected;
         int                     max_easize;
         int                     max_def_easize;
+       u32                     lmv_statfs_start;
  
-       __u32                   tgts_size; /* size of tgts array */
+       u32                     tgts_size; /* size of tgts array */
         struct lmv_tgt_desc     **tgts;
  
         struct obd_connect_data conn_data;
+       struct kobject          *lmv_tgts_kobj;
+       void                    *lmv_cache;
  };
  
+/* Minimum sector size is 512 */
+#define MAX_GUARD_NUMBER (PAGE_SIZE / 512)
+
  struct niobuf_local {
         __u64           lnb_file_offset;
         __u32           lnb_page_offset;
@@ -446,6 +465,9 @@ struct niobuf_local {
         int             lnb_rc;
         struct page     *lnb_page;
         void            *lnb_data;
+       __u16           lnb_guards[MAX_GUARD_NUMBER];
+       __u16           lnb_guard_rpc:1;
+       __u16           lnb_guard_disk:1;
  };
  
  struct tgt_thread_big_cache {
@@ -613,7 +635,6 @@ struct obd_device {
                                          * (for /proc/status only!!) */
                 obd_no_ir:1,            /* no imperative recovery. */
                 obd_process_conf:1,     /* device is processing mgs config */
-               obd_uses_nid_stats:1,   /* maintain per-client OBD stats */
                 obd_checksum_dump:1;    /* dump pages upon cksum error */
  
          /* use separate field as it is set in interrupt to don't mess with
@@ -701,18 +722,15 @@ struct obd_device {
  
         /* Fields used by LProcFS */
         struct lprocfs_stats            *obd_stats;
-       unsigned int                    obd_cntr_base;
  
-       unsigned int                     obd_md_cntr_base;
         struct lprocfs_stats            *obd_md_stats;
  
         struct dentry                   *obd_debugfs_entry;
         struct proc_dir_entry   *obd_proc_entry;
         struct proc_dir_entry   *obd_proc_exports_entry;
-       struct proc_dir_entry   *obd_svc_procroot;
+       struct dentry                   *obd_svc_debugfs_entry;
         struct lprocfs_stats    *obd_svc_stats;
-       struct attribute_group           obd_attrs_group;
-       struct attribute               **obd_attrs;
+       const struct attribute         **obd_attrs;
         struct lprocfs_vars     *obd_vars;
         atomic_t                obd_evict_inprogress;
         wait_queue_head_t       obd_evict_inprogress_waitq;
@@ -762,10 +780,20 @@ struct obd_device {
  /*      KEY_SET_INFO in lustre_idl.h */
  #define KEY_SPTLRPC_CONF        "sptlrpc_conf"
  
-#define KEY_CACHE_SET          "cache_set"
  #define KEY_CACHE_LRU_SHRINK   "cache_lru_shrink"
  #define KEY_OSP_CONNECTED      "osp_connected"
  
+/* Flags for op_xvalid */
+enum op_xvalid {
+       OP_XVALID_CTIME_SET     = BIT(0),       /* 0x0001 */
+       OP_XVALID_BLOCKS        = BIT(1),       /* 0x0002 */
+       OP_XVALID_OWNEROVERRIDE = BIT(2),       /* 0x0004 */
+       OP_XVALID_FLAGS         = BIT(3),       /* 0x0008 */
+       OP_XVALID_PROJID        = BIT(4),       /* 0x0010 */
+       OP_XVALID_LAZYSIZE      = BIT(5),       /* 0x0020 */
+       OP_XVALID_LAZYBLOCKS    = BIT(6),       /* 0x0040 */
+};
+
  struct lu_context;
  
  static inline int it_to_lock_mode(struct lookup_intent *it)
@@ -781,8 +809,6 @@ static inline int it_to_lock_mode(struct lookup_intent *it)
                 return LCK_PR;
         else if (it->it_op &  IT_GETXATTR)
                 return LCK_PR;
-       else if (it->it_op &  IT_SETXATTR)
-               return LCK_PW;
  
         LASSERTF(0, "Invalid it_op: %d\n", it->it_op);
         return -EINVAL;
@@ -811,7 +837,7 @@ enum md_cli_flags {
   */
  static inline bool it_has_reply_body(const struct lookup_intent *it)
  {
-       return it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR);
+       return it->it_op & (IT_OPEN | IT_LOOKUP | IT_GETATTR);
  }
  
  struct md_op_data {
@@ -821,10 +847,12 @@ struct md_op_data {
         struct lu_fid           op_fid4; /* to the operation locks. */
         u32                     op_mds;  /* what mds server open will go to */
         __u32                   op_mode;
-       struct lustre_handle    op_handle;
+       struct lustre_handle    op_open_handle;
         s64                     op_mod_time;
         const char              *op_name;
         size_t                  op_namelen;
+       struct rw_semaphore     *op_mea1_sem;
+       struct rw_semaphore     *op_mea2_sem;
         struct lmv_stripe_md    *op_mea1;
         struct lmv_stripe_md    *op_mea2;
         __u32                   op_suppgids[2];
@@ -836,9 +864,10 @@ struct md_op_data {
  
         /* iattr fields and blocks. */
         struct iattr            op_attr;
+       enum op_xvalid          op_xvalid;      /* eXtra validity flags */
         loff_t                  op_attr_blocks;
-       __u64                   op_valid; /* OBD_MD_* */
-       unsigned int            op_attr_flags; /* LUSTRE_{SYNC,..}_FL */
+       u64                     op_valid;       /* OBD_MD_* */
+       unsigned int            op_attr_flags;  /* LUSTRE_{SYNC,..}_FL */
  
         enum md_op_flags        op_flags;
  
@@ -853,8 +882,9 @@ struct md_op_data {
         __u64                   op_data_version;
         struct lustre_handle    op_lease_handle;
  
-       /* File security context, for creates. */
+       /* File security context, for creates/metadata ops */
         const char             *op_file_secctx_name;
+       __u32                   op_file_secctx_name_size;
         void                   *op_file_secctx;
         __u32                   op_file_secctx_size;
  
@@ -867,6 +897,18 @@ struct md_op_data {
         unsigned int            op_max_pages;
  
         __u16                   op_mirror_id;
+
+       /*
+        * used to access migrating dir: if it's set, assume migration is
+        * finished, use the new layout to access dir, otherwise use old layout.
+        * By default it's not set, because new files are created under new
+        * layout, if we can't find file with name under both old and new
+        * layout, we are sure file with name doesn't exist, but in reverse
+        * order there may be a race with creation by others.
+        */
+       bool                    op_post_migrate;
+       /* used to access dir with bash hash */
+       __u32                   op_stripe_index;
  };
  
  struct md_callback {
@@ -974,8 +1016,6 @@ struct obd_ops {
         int (*o_quotactl)(struct obd_device *, struct obd_export *,
                           struct obd_quotactl *);
  
-       int (*o_ping)(const struct lu_env *, struct obd_export *exp);
-
         /* pools methods */
         int (*o_pool_new)(struct obd_device *obd, char *poolname);
         int (*o_pool_del)(struct obd_device *obd, char *poolname);
@@ -983,12 +1023,6 @@ struct obd_ops {
                           char *ostname);
         int (*o_pool_rem)(struct obd_device *obd, char *poolname,
                           char *ostname);
-       void (*o_getref)(struct obd_device *obd);
-       void (*o_putref)(struct obd_device *obd);
-       /*
-        * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
-        * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
-        * Also, add a wrapper function in include/linux/obd_class.h. */
  };
  
  /* lmv structures */
@@ -1010,7 +1044,7 @@ struct md_open_data {
  };
  
  struct obd_client_handle {
-       struct lustre_handle     och_fh;
+       struct lustre_handle     och_open_handle;
         struct lu_fid            och_fid;
         struct md_open_data     *och_mod;
         struct lustre_handle     och_lease_handle; /* open lock for lease */
@@ -1024,18 +1058,6 @@ struct lookup_intent;
  struct cl_attr;
  
  struct md_ops {
-       /* Every operation from MD_STATS_FIRST_OP up to and including
-        * MD_STATS_LAST_OP will be counted by EXP_MD_OP_INCREMENT()
-        * and will appear in /proc/fs/lustre/{lmv,mdc}/.../md_stats.
-        * Operations after MD_STATS_LAST_OP are excluded from stats.
-        * There are a few reasons for doing this: we prune the 17
-        * counters which will be of minimal use in understanding
-        * metadata utilization, we save memory by allocating 15
-        * instead of 32 counters, we save cycles by not counting.
-        *
-        * MD_STATS_FIRST_OP must be the first member of md_ops.
-        */
-#define MD_STATS_FIRST_OP m_close
         int (*m_close)(struct obd_export *, struct md_op_data *,
                        struct md_open_data *, struct ptlrpc_request **);
  
@@ -1088,8 +1110,6 @@ struct md_ops {
          int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *,
                                   struct lu_fid *, __u64 *bits);
  
-#define MD_STATS_LAST_OP m_revalidate_lock
-
         int (*m_file_resync)(struct obd_export *, struct md_op_data *);
  
         int (*m_get_root)(struct obd_export *, const char *, struct lu_fid *);
@@ -1229,7 +1249,8 @@ static inline int cli_brw_size(struct obd_device *obd)
         return obd->u.cli.cl_max_pages_per_rpc << PAGE_SHIFT;
  }
  
-/* when RPC size or the max RPCs in flight is increased, the max dirty pages
+/*
+ * When RPC size or the max RPCs in flight is increased, the max dirty pages
   * of the client should be increased accordingly to avoid sending fragmented
   * RPCs over the network when the client runs out of the maximum dirty space
   * when so many RPCs are being generated.
@@ -1237,10 +1258,10 @@ static inline int cli_brw_size(struct obd_device *obd)
  static inline void client_adjust_max_dirty(struct client_obd *cli)
  {
          /* initializing */
-       if (cli->cl_dirty_max_pages <= 0)
-               cli->cl_dirty_max_pages = (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024)
-                                                       >> PAGE_SHIFT;
-       else {
+       if (cli->cl_dirty_max_pages <= 0) {
+               cli->cl_dirty_max_pages =
+                       (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024) >> PAGE_SHIFT;
+       } else {
                 unsigned long dirty_max = cli->cl_max_rpcs_in_flight *
                                           cli->cl_max_pages_per_rpc;
  
@@ -1250,6 +1271,12 @@ static inline void client_adjust_max_dirty(struct client_obd *cli)
  
         if (cli->cl_dirty_max_pages > totalram_pages / 8)
                 cli->cl_dirty_max_pages = totalram_pages / 8;
+
+       /* This value is exported to userspace through the max_dirty_mb
+        * parameter.  So we round up the number of pages to make it a round
+        * number of MBs. */
+       cli->cl_dirty_max_pages = round_up(cli->cl_dirty_max_pages,
+                                          1 << (20 - PAGE_SHIFT));
  }
  
  #endif /* __OBD_H */