LU-1330 obdclass: add obd_target.h

[fs/lustre-release.git] / lustre / include / lustre_net.h
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h

index 9746c27..724fe0e 100644 (file)
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -27,7 +27,7 @@
   * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
   * Use is subject to license terms.
   *
- * Copyright (c) 2010, 2012, Intel Corporation.
+ * Copyright (c) 2010, 2013, Intel Corporation.
   */
  /*
   * This file is part of Lustre, http://www.lustre.org/
@@ -108,13 +108,13 @@
   */
  #define PTLRPC_MAX_BRW_BITS    (LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS)
  #define PTLRPC_MAX_BRW_SIZE    (1 << PTLRPC_MAX_BRW_BITS)
-#define PTLRPC_MAX_BRW_PAGES   (PTLRPC_MAX_BRW_SIZE >> CFS_PAGE_SHIFT)
+#define PTLRPC_MAX_BRW_PAGES   (PTLRPC_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
  
  #define ONE_MB_BRW_SIZE                (1 << LNET_MTU_BITS)
  #define MD_MAX_BRW_SIZE                (1 << LNET_MTU_BITS)
-#define MD_MAX_BRW_PAGES       (MD_MAX_BRW_SIZE >> CFS_PAGE_SHIFT)
+#define MD_MAX_BRW_PAGES       (MD_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
  #define DT_MAX_BRW_SIZE                PTLRPC_MAX_BRW_SIZE
-#define DT_MAX_BRW_PAGES       (DT_MAX_BRW_SIZE >> CFS_PAGE_SHIFT)
+#define DT_MAX_BRW_PAGES       (DT_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
  #define OFD_MAX_BRW_SIZE       (1 << LNET_MTU_BITS)
  
  /* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
@@ -122,8 +122,8 @@
  # if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
  #  error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
  # endif
-# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * CFS_PAGE_SIZE))
-#  error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * CFS_PAGE_SIZE"
+# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE))
+#  error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE"
  # endif
  # if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT)
  #  error "PTLRPC_MAX_BRW_SIZE too big"
@@ -465,7 +465,7 @@
    */
   /* depress threads factor for VM with small memory size */
  #define OSS_THR_FACTOR         min_t(int, 8, \
-                               CFS_NUM_CACHEPAGES >> (28 - CFS_PAGE_SHIFT))
+                               NUM_CACHEPAGES >> (28 - PAGE_CACHE_SHIFT))
  #define OSS_NTHRS_INIT         (PTLRPC_NTHRS_INIT + 1)
  #define OSS_NTHRS_BASE         64
  #define OSS_NTHRS_MAX          512
@@ -787,6 +787,18 @@ enum ptlrpc_nrs_ctl {
  };
  
  /**
+ * ORR policy operations
+ */
+enum nrs_ctl_orr {
+       NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
+       NRS_CTL_ORR_WR_QUANTUM,
+       NRS_CTL_ORR_RD_OFF_TYPE,
+       NRS_CTL_ORR_WR_OFF_TYPE,
+       NRS_CTL_ORR_RD_SUPP_REQ,
+       NRS_CTL_ORR_WR_SUPP_REQ,
+};
+
+/**
   * NRS policy operations.
   *
   * These determine the behaviour of a policy, and are called in response to
@@ -1413,6 +1425,276 @@ struct nrs_fifo_req {
  /** @} fifo */
  
  /**
+ * \name CRR-N
+ *
+ * CRR-N, Client Round Robin over NIDs
+ * @{
+ */
+
+/**
+ * private data structure for CRR-N NRS
+ */
+struct nrs_crrn_net {
+       struct ptlrpc_nrs_resource      cn_res;
+       cfs_binheap_t                  *cn_binheap;
+       cfs_hash_t                     *cn_cli_hash;
+       /**
+        * Used when a new scheduling round commences, in order to synchronize
+        * all clients with the new round number.
+        */
+       __u64                           cn_round;
+       /**
+        * Determines the relevant ordering amongst request batches within a
+        * scheduling round.
+        */
+       __u64                           cn_sequence;
+       /**
+        * Round Robin quantum; the maximum number of RPCs that each request
+        * batch for each client can have in a scheduling round.
+        */
+       __u16                           cn_quantum;
+};
+
+/**
+ * Object representing a client in CRR-N, as identified by its NID
+ */
+struct nrs_crrn_client {
+       struct ptlrpc_nrs_resource      cc_res;
+       cfs_hlist_node_t                cc_hnode;
+       lnet_nid_t                      cc_nid;
+       /**
+        * The round number against which this client is currently scheduling
+        * requests.
+        */
+       __u64                           cc_round;
+       /**
+        * The sequence number used for requests scheduled by this client during
+        * the current round number.
+        */
+       __u64                           cc_sequence;
+       cfs_atomic_t                    cc_ref;
+       /**
+        * Round Robin quantum; the maximum number of RPCs the client is allowed
+        * to schedule in a single batch of each round.
+        */
+       __u16                           cc_quantum;
+       /**
+        * # of pending requests for this client, on all existing rounds
+        */
+       __u16                           cc_active;
+};
+
+/**
+ * CRR-N NRS request definition
+ */
+struct nrs_crrn_req {
+       /**
+        * Round number for this request; shared with all other requests in the
+        * same batch.
+        */
+       __u64                   cr_round;
+       /**
+        * Sequence number for this request; shared with all other requests in
+        * the same batch.
+        */
+       __u64                   cr_sequence;
+};
+
+/**
+ * CRR-N policy operations.
+ */
+enum nrs_ctl_crr {
+       /**
+        * Read the RR quantum size of a CRR-N policy.
+        */
+       NRS_CTL_CRRN_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
+       /**
+        * Write the RR quantum size of a CRR-N policy.
+        */
+       NRS_CTL_CRRN_WR_QUANTUM,
+};
+
+/** @} CRR-N */
+
+/**
+ * \name ORR/TRR
+ *
+ * ORR/TRR (Object-based Round Robin/Target-based Round Robin) NRS policies
+ * @{
+ */
+
+/**
+ * Lower and upper byte offsets of a brw RPC
+ */
+struct nrs_orr_req_range {
+       __u64           or_start;
+       __u64           or_end;
+};
+
+/**
+ * RPC types supported by the ORR/TRR policies
+ */
+enum nrs_orr_supp {
+       NOS_OST_READ  = (1 << 0),
+       NOS_OST_WRITE = (1 << 1),
+       NOS_OST_RW    = (NOS_OST_READ | NOS_OST_WRITE),
+       /**
+        * Default value for policies.
+        */
+       NOS_DFLT      = NOS_OST_READ
+};
+
+/**
+ * As unique keys for grouping RPCs together, we use the object's OST FID for
+ * the ORR policy, and the OST index for the TRR policy.
+ *
+ * XXX: We waste some space for TRR policy instances by using a union, but it
+ *     allows to consolidate some of the code between ORR and TRR, and these
+ *     policies will probably eventually merge into one anyway.
+ */
+struct nrs_orr_key {
+       union {
+               /** object FID for ORR */
+               struct lu_fid   ok_fid;
+               /** OST index for TRR */
+               __u32           ok_idx;
+       };
+};
+
+/**
+ * The largest base string for unique hash/slab object names is
+ * "nrs_orr_reg_", so 13 characters. We add 3 to this to be used for the CPT
+ * id number, so this _should_ be more than enough for the maximum number of
+ * CPTs on any system. If it does happen that this statement is incorrect,
+ * nrs_orr_genobjname() will inevitably yield a non-unique name and cause
+ * kmem_cache_create() to complain (on Linux), so the erroneous situation
+ * will hopefully not go unnoticed.
+ */
+#define NRS_ORR_OBJ_NAME_MAX   (sizeof("nrs_orr_reg_") + 3)
+
+/**
+ * private data structure for ORR and TRR NRS
+ */
+struct nrs_orr_data {
+       struct ptlrpc_nrs_resource      od_res;
+       cfs_binheap_t                  *od_binheap;
+       cfs_hash_t                     *od_obj_hash;
+       struct kmem_cache                      *od_cache;
+       /**
+        * Used when a new scheduling round commences, in order to synchronize
+        * all object or OST batches with the new round number.
+        */
+       __u64                           od_round;
+       /**
+        * Determines the relevant ordering amongst request batches within a
+        * scheduling round.
+        */
+       __u64                           od_sequence;
+       /**
+        * RPC types that are currently supported.
+        */
+       enum nrs_orr_supp               od_supp;
+       /**
+        * Round Robin quantum; the maxium number of RPCs that each request
+        * batch for each object or OST can have in a scheduling round.
+        */
+       __u16                           od_quantum;
+       /**
+        * Whether to use physical disk offsets or logical file offsets.
+        */
+       bool                            od_physical;
+       /**
+        * XXX: We need to provide a persistently allocated string to hold
+        * unique object names for this policy, since in currently supported
+        * versions of Linux by Lustre, kmem_cache_create() just sets a pointer
+        * to the name string provided. kstrdup() is used in the version of
+        * kmeme_cache_create() in current Linux mainline, so we may be able to
+        * remove this in the future.
+        */
+       char                            od_objname[NRS_ORR_OBJ_NAME_MAX];
+};
+
+/**
+ * Represents a backend-fs object or OST in the ORR and TRR policies
+ * respectively
+ */
+struct nrs_orr_object {
+       struct ptlrpc_nrs_resource      oo_res;
+       cfs_hlist_node_t                oo_hnode;
+       /**
+        * The round number against which requests are being scheduled for this
+        * object or OST
+        */
+       __u64                           oo_round;
+       /**
+        * The sequence number used for requests scheduled for this object or
+        * OST during the current round number.
+        */
+       __u64                           oo_sequence;
+       /**
+        * The key of the object or OST for which this structure instance is
+        * scheduling RPCs
+        */
+       struct nrs_orr_key              oo_key;
+       cfs_atomic_t                    oo_ref;
+       /**
+        * Round Robin quantum; the maximum number of RPCs that are allowed to
+        * be scheduled for the object or OST in a single batch of each round.
+        */
+       __u16                           oo_quantum;
+       /**
+        * # of pending requests for this object or OST, on all existing rounds
+        */
+       __u16                           oo_active;
+};
+
+/**
+ * ORR/TRR NRS request definition
+ */
+struct nrs_orr_req {
+       /**
+        * The offset range this request covers
+        */
+       struct nrs_orr_req_range        or_range;
+       /**
+        * Round number for this request; shared with all other requests in the
+        * same batch.
+        */
+       __u64                           or_round;
+       /**
+        * Sequence number for this request; shared with all other requests in
+        * the same batch.
+        */
+       __u64                           or_sequence;
+       /**
+        * For debugging purposes.
+        */
+       struct nrs_orr_key              or_key;
+       /**
+        * An ORR policy instance has filled in request information while
+        * enqueueing the request on the service partition's regular NRS head.
+        */
+       unsigned int                    or_orr_set:1;
+       /**
+        * A TRR policy instance has filled in request information while
+        * enqueueing the request on the service partition's regular NRS head.
+        */
+       unsigned int                    or_trr_set:1;
+       /**
+        * Request offset ranges have been filled in with logical offset
+        * values.
+        */
+       unsigned int                    or_logical_set:1;
+       /**
+        * Request offset ranges have been filled in with physical offset
+        * values.
+        */
+       unsigned int                    or_physical_set:1;
+};
+
+/** @} ORR/TRR */
+
+/**
   * NRS request
   *
   * Instances of this object exist embedded within ptlrpc_request; the main
@@ -1447,6 +1729,12 @@ struct ptlrpc_nrs_request {
                  * Fields for the FIFO policy
                  */
                 struct nrs_fifo_req     fifo;
+               /**
+                * CRR-N request defintion
+                */
+               struct nrs_crrn_req     crr;
+               /** ORR and TRR share the same request definition */
+               struct nrs_orr_req      orr;
         } nr_u;
         /**
          * Externally-registering policies may want to use this to allocate
@@ -2414,7 +2702,7 @@ struct ptlrpcd_ctl {
           */
          struct ptlrpc_request_set  *pc_set;
          /**
-         * Thread name used in cfs_daemonize()
+        * Thread name used in kthread_run()
           */
          char                        pc_name[16];
          /**
@@ -2695,16 +2983,16 @@ static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk)
         __ptlrpc_free_bulk(bulk, 0);
  }
  void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
-                            cfs_page_t *page, int pageoffset, int len, int);
+                            struct page *page, int pageoffset, int len, int);
  static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
-                                            cfs_page_t *page, int pageoffset,
+                                            struct page *page, int pageoffset,
                                              int len)
  {
         __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1);
  }
  
  static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
-                                              cfs_page_t *page, int pageoffset,
+                                              struct page *page, int pageoffset,
                                                int len)
  {
         __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
@@ -2808,6 +3096,8 @@ int liblustre_check_services(void *arg);
  void ptlrpc_daemonize(char *name);
  int ptlrpc_service_health_check(struct ptlrpc_service *);
  void ptlrpc_server_drop_request(struct ptlrpc_request *req);
+void ptlrpc_request_change_export(struct ptlrpc_request *req,
+                                 struct obd_export *export);
  
  #ifdef __KERNEL__
  int ptlrpc_hr_init(void);
@@ -2935,6 +3225,38 @@ lustre_shrink_reply(struct ptlrpc_request *req, int segment,
          req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment,
                                             newlen, move_data);
  }
+
+#ifdef LUSTRE_TRANSLATE_ERRNOS
+
+static inline int ptlrpc_status_hton(int h)
+{
+       /*
+        * Positive errnos must be network errnos, such as LUSTRE_EDEADLK,
+        * ELDLM_LOCK_ABORTED, etc.
+        */
+       if (h < 0)
+               return -lustre_errno_hton(-h);
+       else
+               return h;
+}
+
+static inline int ptlrpc_status_ntoh(int n)
+{
+       /*
+        * See the comment in ptlrpc_status_hton().
+        */
+       if (n < 0)
+               return -lustre_errno_ntoh(-n);
+       else
+               return n;
+}
+
+#else
+
+#define ptlrpc_status_hton(h) (h)
+#define ptlrpc_status_ntoh(n) (n)
+
+#endif
  /** @} */
  
  /** Change request phase of \a req to \a new_phase */