LU-8130 lov: convert lo[v|d]_pool to use rhashtable

[fs/lustre-release.git] / lustre / include / obd.h
diff --git a/lustre/include/obd.h b/lustre/include/obd.h

index b35b3fa..8461df7 100644 (file)
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -87,6 +87,8 @@ typedef int (*obd_enqueue_update_f)(void *cookie, int rc);
  struct obd_info {
         /* OBD_STATFS_* flags */
         __u64                   oi_flags;
+       struct obd_device      *oi_obd;
+       struct lu_tgt_desc     *oi_tgt;
          /* statfs data specific for every OSC, if needed at all. */
          struct obd_statfs      *oi_osfs;
          /* An update callback which is called to update some data on upper
@@ -97,21 +99,18 @@ struct obd_info {
  };
  
  struct obd_type {
-       struct list_head         typ_chain;
-       struct obd_ops          *typ_dt_ops;
-       struct md_ops           *typ_md_ops;
+       const struct obd_ops    *typ_dt_ops;
+       const struct md_ops     *typ_md_ops;
         struct proc_dir_entry   *typ_procroot;
-       struct proc_dir_entry   *typ_procsym;
         struct dentry           *typ_debugfs_entry;
  #ifdef HAVE_SERVER_SUPPORT
         bool                     typ_sym_filter;
  #endif
-       char                    *typ_name;
-       int                      typ_refcnt;
+       atomic_t                 typ_refcnt;
         struct lu_device_type   *typ_lu;
-       spinlock_t               obd_type_lock;
-       struct kobject          *typ_kobj;
+       struct kobject           typ_kobj;
  };
+#define typ_name typ_kobj.name
  
  struct brw_page {
         u64              off;
@@ -136,12 +135,11 @@ struct timeout_item {
  #define OSC_MAX_DIRTY_MB_MAX   2048     /* arbitrary, but < MAX_LONG bytes */
  #define OSC_DEFAULT_RESENDS    10
  
-/* possible values for fo_sync_lock_cancel */
-enum {
-        NEVER_SYNC_ON_CANCEL = 0,
-        BLOCKING_SYNC_ON_CANCEL = 1,
-        ALWAYS_SYNC_ON_CANCEL = 2,
-        NUM_SYNC_ON_CANCEL_STATES
+/* possible values for lut_sync_lock_cancel */
+enum tgt_sync_lock_cancel {
+       SYNC_LOCK_CANCEL_NEVER    = 0,
+       SYNC_LOCK_CANCEL_BLOCKING = 1,
+       SYNC_LOCK_CANCEL_ALWAYS   = 2,
  };
  
  /*
@@ -151,7 +149,15 @@ enum {
   * vmalloc(). Excessive use of vmalloc() may cause spinlock contention
   * on the MDS.
   */
-#define OBD_MAX_DEFAULT_EA_SIZE                4096
+#define OBD_MAX_DEFAULT_EA_SIZE        4096
+
+/*
+ * Lustre can handle larger xattrs internally, but we must respect the Linux
+ * VFS limitation or tools like tar cannot interact with Lustre volumes
+ * correctly.
+ */
+#define OBD_MAX_EA_SIZE                XATTR_SIZE_MAX
+
  
  enum obd_cl_sem_lock_class {
         OBD_CLI_SEM_NORMAL,
@@ -205,7 +211,6 @@ struct client_obd {
         /* the grant values are protected by loi_list_lock below */
         unsigned long            cl_dirty_pages;      /* all _dirty_ in pages */
         unsigned long            cl_dirty_max_pages;  /* allowed w/o rpc */
-       unsigned long            cl_dirty_transit;    /* dirty synchronous */
         unsigned long            cl_avail_grant;   /* bytes of credit for ost */
         unsigned long            cl_lost_grant;    /* lost credits (trunc) */
         /* grant consumed for dirty pages */
@@ -257,9 +262,9 @@ struct client_obd {
         /* just a sum of the loi/lop pending numbers to be exported by /proc */
         atomic_t                cl_pending_w_pages;
         atomic_t                cl_pending_r_pages;
-       __u32                   cl_max_pages_per_rpc;
-       __u32                   cl_max_rpcs_in_flight;
-       __u32                   cl_short_io_bytes;
+       u32                     cl_max_pages_per_rpc;
+       u32                     cl_max_rpcs_in_flight;
+       u32                     cl_max_short_io_bytes;
         struct obd_histogram    cl_read_rpc_hist;
         struct obd_histogram    cl_write_rpc_hist;
         struct obd_histogram    cl_read_page_hist;
@@ -333,6 +338,8 @@ struct client_obd {
          __u32                    cl_supp_cksum_types;
          /* checksum algorithm to be used */
         enum cksum_types         cl_cksum_type;
+       /* preferred checksum algorithm to be used */
+       enum cksum_types         cl_preferred_cksum_type;
  
          /* also protected by the poorly named _loi_list_lock lock above */
          struct osc_async_rc      cl_ar;
@@ -346,8 +353,11 @@ struct client_obd {
         /* ptlrpc work for writeback in ptlrpcd context */
         void                    *cl_writeback_work;
         void                    *cl_lru_work;
+       struct mutex              cl_quota_mutex;
         /* hash tables for osc_quota_info */
         struct cfs_hash         *cl_quota_hash[LL_MAXQUOTAS];
+       /* the xid of the request updating the hash tables */
+       __u64                    cl_quota_last_xid;
         /* Links to the global list of registered changelog devices */
         struct list_head         cl_chg_dev_linkage;
  };
@@ -366,29 +376,12 @@ struct echo_client_obd {
         __u64                   ec_unique;
  };
  
-/* Generic subset of OSTs */
-struct ost_pool {
-        __u32              *op_array;      /* array of index of
-                                                   lov_obd->lov_tgts */
-        unsigned int        op_count;      /* number of OSTs in the array */
-        unsigned int        op_size;       /* allocated size of lp_array */
-       struct rw_semaphore op_rw_sem;     /* to protect ost_pool use */
-};
-
  /* allow statfs data caching for 1 second */
  #define OBD_STATFS_CACHE_SECONDS 1
+/* arbitrary maximum. larger would be useless, allows catching bogus input */
+#define OBD_STATFS_CACHE_MAX_AGE 3600 /* seconds */
  
-struct lov_tgt_desc {
-       struct list_head    ltd_kill;
-        struct obd_uuid     ltd_uuid;
-        struct obd_device  *ltd_obd;
-        struct obd_export  *ltd_exp;
-        __u32               ltd_gen;
-        __u32               ltd_index;   /* index in lov_obd->tgts */
-        unsigned long       ltd_active:1,/* is this target up for requests */
-                            ltd_activate:1,/* should  target be activated */
-                            ltd_reap:1;  /* should this target be deleted */
-};
+#define lov_tgt_desc lu_tgt_desc
  
  struct lov_md_tgt_desc {
         struct obd_device *lmtd_mdc;
@@ -398,7 +391,7 @@ struct lov_md_tgt_desc {
  struct lov_obd {
         struct lov_desc         desc;
         struct lov_tgt_desc   **lov_tgts;               /* sparse array */
-       struct ost_pool         lov_packed;             /* all OSTs in a packed
+       struct lu_tgt_pool      lov_packed;             /* all OSTs in a packed
                                                            array */
         struct mutex            lov_lock;
         struct obd_connect_data lov_ocd;
@@ -408,7 +401,7 @@ struct lov_obd {
         __u32                   lov_tgt_size;   /* size of tgts array */
         int                     lov_connects;
         int                     lov_pool_count;
-       struct cfs_hash        *lov_pools_hash_body; /* used for key access */
+       struct rhashtable       lov_pools_hash_body; /* used for key access */
         struct list_head        lov_pool_list;  /* used for sequential access */
         struct proc_dir_entry  *lov_pool_proc_entry;
         enum lustre_sec_part    lov_sp_me;
@@ -423,34 +416,32 @@ struct lov_obd {
         struct kobject          *lov_tgts_kobj;
  };
  
-struct lmv_tgt_desc {
-       struct obd_uuid         ltd_uuid;
-       struct obd_device       *ltd_obd;
-       struct obd_export       *ltd_exp;
-       __u32                   ltd_idx;
-       struct mutex            ltd_fid_mutex;
-       unsigned long           ltd_active:1; /* target up for requests */
-};
+#define lmv_tgt_desc lu_tgt_desc
  
  struct lmv_obd {
         struct lu_client_fld    lmv_fld;
         spinlock_t              lmv_lock;
-       struct lmv_desc         desc;
  
-       struct mutex            lmv_init_mutex;
         int                     connected;
         int                     max_easize;
         int                     max_def_easize;
+       u32                     lmv_statfs_start;
  
-       __u32                   tgts_size; /* size of tgts array */
-       struct lmv_tgt_desc     **tgts;
-       int                     lmv_statfs_start;
-
+       struct lu_tgt_descs     lmv_mdt_descs;
  
         struct obd_connect_data conn_data;
         struct kobject          *lmv_tgts_kobj;
+       void                    *lmv_cache;
+
+       __u32                   lmv_qos_rr_index;
  };
  
+#define lmv_mdt_count  lmv_mdt_descs.ltd_lmv_desc.ld_tgt_count
+#define lmv_qos                lmv_mdt_descs.ltd_qos
+
+/* Minimum sector size is 512 */
+#define MAX_GUARD_NUMBER (PAGE_SIZE / 512)
+
  struct niobuf_local {
         __u64           lnb_file_offset;
         __u32           lnb_page_offset;
@@ -459,6 +450,11 @@ struct niobuf_local {
         int             lnb_rc;
         struct page     *lnb_page;
         void            *lnb_data;
+       __u16           lnb_guards[MAX_GUARD_NUMBER];
+       __u16           lnb_guard_rpc:1;
+       __u16           lnb_guard_disk:1;
+       /* separate unlock for read path to allow shared access */
+       __u16           lnb_locked:1;
  };
  
  struct tgt_thread_big_cache {
@@ -591,7 +587,6 @@ struct obd_llog_group {
         struct llog_ctxt   *olg_ctxts[LLOG_MAX_CTXTS];
         wait_queue_head_t  olg_waitq;
         spinlock_t         olg_lock;
-       struct mutex       olg_cat_processing;
  };
  
  /* corresponds to one of the obd's */
@@ -632,7 +627,7 @@ struct obd_device {
           * protection of other bits using _bh lock */
          unsigned long obd_recovery_expired:1;
          /* uuid-export hash body */
-       struct cfs_hash             *obd_uuid_hash;
+       struct rhashtable               obd_uuid_hash;
          /* nid-export hash body */
         struct cfs_hash             *obd_nid_hash;
         /* nid stats body */
@@ -667,7 +662,7 @@ struct obd_device {
         struct list_head        obd_exports_timed;
         time64_t                obd_eviction_timer;     /* for ping evictor */
  
-       int                     obd_max_recoverable_clients;
+       atomic_t                obd_max_recoverable_clients;
         atomic_t                obd_connected_clients;
         int                     obd_stale_clients;
          /* this lock protects all recovery list_heads, timer and
@@ -678,13 +673,16 @@ struct obd_device {
         int                     obd_requests_queued_for_recovery;
         wait_queue_head_t       obd_next_transno_waitq;
         /* protected by obd_recovery_task_lock */
-       struct timer_list       obd_recovery_timer;
+       struct hrtimer          obd_recovery_timer;
         /* seconds */
         time64_t                obd_recovery_start;
         /* seconds, for lprocfs_status */
         time64_t                obd_recovery_end;
-       time64_t                obd_recovery_time_hard;
-       time64_t                obd_recovery_timeout;
+       /* To tell timeouts from time stamps Lustre uses time_t
+        * instead of time64_t.
+        */
+       time_t                  obd_recovery_time_hard;
+       time_t                  obd_recovery_timeout;
         int                     obd_recovery_ir_factor;
  
         /* new recovery stuff from CMD2 */
@@ -745,6 +743,13 @@ struct obd_device {
         struct completion               obd_kobj_unregister;
  };
  
+int obd_uuid_add(struct obd_device *obd, struct obd_export *export);
+void obd_uuid_del(struct obd_device *obd, struct obd_export *export);
+#ifdef HAVE_SERVER_SUPPORT
+struct obd_export *obd_uuid_lookup(struct obd_device *obd,
+                                  struct obd_uuid *uuid);
+#endif
+
  /* get/set_info keys */
  #define KEY_ASYNC               "async"
  #define KEY_CHANGELOG_CLEAR     "changelog_clear"
@@ -771,7 +776,6 @@ struct obd_device {
  /*      KEY_SET_INFO in lustre_idl.h */
  #define KEY_SPTLRPC_CONF        "sptlrpc_conf"
  
-#define KEY_CACHE_SET          "cache_set"
  #define KEY_CACHE_LRU_SHRINK   "cache_lru_shrink"
  #define KEY_OSP_CONNECTED      "osp_connected"
  
@@ -820,6 +824,15 @@ enum md_cli_flags {
         CLI_HASH64      = 1 << 2,
         CLI_API32       = 1 << 3,
         CLI_MIGRATE     = 1 << 4,
+       CLI_DIRTY_DATA  = 1 << 5,
+};
+
+enum md_op_code {
+       LUSTRE_OPC_MKDIR        = 0,
+       LUSTRE_OPC_SYMLINK      = 1,
+       LUSTRE_OPC_MKNOD        = 2,
+       LUSTRE_OPC_CREATE       = 3,
+       LUSTRE_OPC_ANY          = 5,
  };
  
  /**
@@ -839,12 +852,16 @@ struct md_op_data {
         struct lu_fid           op_fid4; /* to the operation locks. */
         u32                     op_mds;  /* what mds server open will go to */
         __u32                   op_mode;
+       enum md_op_code         op_code;
         struct lustre_handle    op_open_handle;
         s64                     op_mod_time;
         const char              *op_name;
         size_t                  op_namelen;
+       struct rw_semaphore     *op_mea1_sem;
+       struct rw_semaphore     *op_mea2_sem;
         struct lmv_stripe_md    *op_mea1;
         struct lmv_stripe_md    *op_mea2;
+       struct lmv_stripe_md    *op_default_mea1;       /* default LMV */
         __u32                   op_suppgids[2];
         __u32                   op_fsuid;
         __u32                   op_fsgid;
@@ -872,14 +889,12 @@ struct md_op_data {
         __u64                   op_data_version;
         struct lustre_handle    op_lease_handle;
  
-       /* File security context, for creates. */
+       /* File security context, for creates/metadata ops */
         const char             *op_file_secctx_name;
+       __u32                   op_file_secctx_name_size;
         void                   *op_file_secctx;
         __u32                   op_file_secctx_size;
  
-       /* default stripe offset */
-       __u32                   op_default_stripe_offset;
-
         __u32                   op_projid;
  
         /* Used by readdir */
@@ -898,6 +913,8 @@ struct md_op_data {
         bool                    op_post_migrate;
         /* used to access dir with bash hash */
         __u32                   op_stripe_index;
+       /* Archive ID for PCC attach */
+       __u32                   op_archive_id;
  };
  
  struct md_callback {
@@ -1005,8 +1022,6 @@ struct obd_ops {
         int (*o_quotactl)(struct obd_device *, struct obd_export *,
                           struct obd_quotactl *);
  
-       int (*o_ping)(const struct lu_env *, struct obd_export *exp);
-
         /* pools methods */
         int (*o_pool_new)(struct obd_device *obd, char *poolname);
         int (*o_pool_del)(struct obd_device *obd, char *poolname);
@@ -1020,8 +1035,12 @@ struct obd_ops {
  struct lustre_md {
         struct mdt_body         *body;
         struct lu_buf            layout;
-       struct lmv_stripe_md    *lmv;
-#ifdef CONFIG_FS_POSIX_ACL
+       union {
+               struct lmv_stripe_md    *lmv;
+               struct lmv_foreign_md   *lfm;
+       };
+       struct lmv_stripe_md    *default_lmv;
+#ifdef CONFIG_LUSTRE_FS_POSIX_ACL
         struct posix_acl        *posix_acl;
  #endif
  };
@@ -1146,6 +1165,8 @@ struct md_ops {
                                   struct lu_fid *fid);
         int (*m_unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm,
                           const union lmv_mds_md *lmv, size_t lmv_size);
+       int (*m_rmfid)(struct obd_export *exp, struct fid_array *fa, int *rcs,
+                      struct ptlrpc_request_set *set);
  };
  
  static inline struct md_open_data *obd_mod_alloc(void)
@@ -1260,8 +1281,8 @@ static inline void client_adjust_max_dirty(struct client_obd *cli)
                         cli->cl_dirty_max_pages = dirty_max;
         }
  
-       if (cli->cl_dirty_max_pages > totalram_pages / 8)
-               cli->cl_dirty_max_pages = totalram_pages / 8;
+       if (cli->cl_dirty_max_pages > cfs_totalram_pages() / 8)
+               cli->cl_dirty_max_pages = cfs_totalram_pages() / 8;
  
         /* This value is exported to userspace through the max_dirty_mb
          * parameter.  So we round up the number of pages to make it a round