* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2012, Intel Corporation.
+ * Copyright (c) 2010, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
*/
#define PTLRPC_MAX_BRW_BITS (LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS)
#define PTLRPC_MAX_BRW_SIZE (1 << PTLRPC_MAX_BRW_BITS)
-#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE >> CFS_PAGE_SHIFT)
+#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
#define ONE_MB_BRW_SIZE (1 << LNET_MTU_BITS)
#define MD_MAX_BRW_SIZE (1 << LNET_MTU_BITS)
-#define MD_MAX_BRW_PAGES (MD_MAX_BRW_SIZE >> CFS_PAGE_SHIFT)
+#define MD_MAX_BRW_PAGES (MD_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
#define DT_MAX_BRW_SIZE PTLRPC_MAX_BRW_SIZE
-#define DT_MAX_BRW_PAGES (DT_MAX_BRW_SIZE >> CFS_PAGE_SHIFT)
+#define DT_MAX_BRW_PAGES (DT_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
#define OFD_MAX_BRW_SIZE (1 << LNET_MTU_BITS)
/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
# error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
# endif
-# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * CFS_PAGE_SIZE))
-# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * CFS_PAGE_SIZE"
+# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE))
+# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE"
# endif
# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT)
# error "PTLRPC_MAX_BRW_SIZE too big"
*/
/* depress threads factor for VM with small memory size */
#define OSS_THR_FACTOR min_t(int, 8, \
- CFS_NUM_CACHEPAGES >> (28 - CFS_PAGE_SHIFT))
+ NUM_CACHEPAGES >> (28 - PAGE_CACHE_SHIFT))
#define OSS_NTHRS_INIT (PTLRPC_NTHRS_INIT + 1)
#define OSS_NTHRS_BASE 64
#define OSS_NTHRS_MAX 512
};
/**
+ * ORR policy operations
+ */
+enum nrs_ctl_orr {
+ NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
+ NRS_CTL_ORR_WR_QUANTUM,
+ NRS_CTL_ORR_RD_OFF_TYPE,
+ NRS_CTL_ORR_WR_OFF_TYPE,
+ NRS_CTL_ORR_RD_SUPP_REQ,
+ NRS_CTL_ORR_WR_SUPP_REQ,
+};
+
+/**
* NRS policy operations.
*
* These determine the behaviour of a policy, and are called in response to
/** @} fifo */
/**
+ * \name CRR-N
+ *
+ * CRR-N, Client Round Robin over NIDs
+ * @{
+ */
+
+/**
+ * private data structure for CRR-N NRS
+ */
+struct nrs_crrn_net {
+ struct ptlrpc_nrs_resource cn_res;
+ cfs_binheap_t *cn_binheap;
+ cfs_hash_t *cn_cli_hash;
+ /**
+ * Used when a new scheduling round commences, in order to synchronize
+ * all clients with the new round number.
+ */
+ __u64 cn_round;
+ /**
+ * Determines the relevant ordering amongst request batches within a
+ * scheduling round.
+ */
+ __u64 cn_sequence;
+ /**
+ * Round Robin quantum; the maximum number of RPCs that each request
+ * batch for each client can have in a scheduling round.
+ */
+ __u16 cn_quantum;
+};
+
+/**
+ * Object representing a client in CRR-N, as identified by its NID
+ */
+struct nrs_crrn_client {
+ struct ptlrpc_nrs_resource cc_res;
+ cfs_hlist_node_t cc_hnode;
+ lnet_nid_t cc_nid;
+ /**
+ * The round number against which this client is currently scheduling
+ * requests.
+ */
+ __u64 cc_round;
+ /**
+ * The sequence number used for requests scheduled by this client during
+ * the current round number.
+ */
+ __u64 cc_sequence;
+ cfs_atomic_t cc_ref;
+ /**
+ * Round Robin quantum; the maximum number of RPCs the client is allowed
+ * to schedule in a single batch of each round.
+ */
+ __u16 cc_quantum;
+ /**
+ * # of pending requests for this client, on all existing rounds
+ */
+ __u16 cc_active;
+};
+
+/**
+ * CRR-N NRS request definition
+ */
+struct nrs_crrn_req {
+ /**
+ * Round number for this request; shared with all other requests in the
+ * same batch.
+ */
+ __u64 cr_round;
+ /**
+ * Sequence number for this request; shared with all other requests in
+ * the same batch.
+ */
+ __u64 cr_sequence;
+};
+
+/**
+ * CRR-N policy operations.
+ */
+enum nrs_ctl_crr {
+ /**
+ * Read the RR quantum size of a CRR-N policy.
+ */
+ NRS_CTL_CRRN_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
+ /**
+ * Write the RR quantum size of a CRR-N policy.
+ */
+ NRS_CTL_CRRN_WR_QUANTUM,
+};
+
+/** @} CRR-N */
+
+/**
+ * \name ORR/TRR
+ *
+ * ORR/TRR (Object-based Round Robin/Target-based Round Robin) NRS policies
+ * @{
+ */
+
+/**
+ * Lower and upper byte offsets of a brw RPC
+ */
+struct nrs_orr_req_range {
+ __u64 or_start;
+ __u64 or_end;
+};
+
+/**
+ * RPC types supported by the ORR/TRR policies
+ */
+enum nrs_orr_supp {
+ NOS_OST_READ = (1 << 0),
+ NOS_OST_WRITE = (1 << 1),
+ NOS_OST_RW = (NOS_OST_READ | NOS_OST_WRITE),
+ /**
+ * Default value for policies.
+ */
+ NOS_DFLT = NOS_OST_READ
+};
+
+/**
+ * As unique keys for grouping RPCs together, we use the object's OST FID for
+ * the ORR policy, and the OST index for the TRR policy.
+ *
+ * XXX: We waste some space for TRR policy instances by using a union, but it
+ * allows to consolidate some of the code between ORR and TRR, and these
+ * policies will probably eventually merge into one anyway.
+ */
+struct nrs_orr_key {
+ union {
+ /** object FID for ORR */
+ struct lu_fid ok_fid;
+ /** OST index for TRR */
+ __u32 ok_idx;
+ };
+};
+
+/**
+ * The largest base string for unique hash/slab object names is
+ * "nrs_orr_reg_", so 13 characters. We add 3 to this to be used for the CPT
+ * id number, so this _should_ be more than enough for the maximum number of
+ * CPTs on any system. If it does happen that this statement is incorrect,
+ * nrs_orr_genobjname() will inevitably yield a non-unique name and cause
+ * kmem_cache_create() to complain (on Linux), so the erroneous situation
+ * will hopefully not go unnoticed.
+ */
+#define NRS_ORR_OBJ_NAME_MAX (sizeof("nrs_orr_reg_") + 3)
+
+/**
+ * private data structure for ORR and TRR NRS
+ */
+struct nrs_orr_data {
+ struct ptlrpc_nrs_resource od_res;
+ cfs_binheap_t *od_binheap;
+ cfs_hash_t *od_obj_hash;
+ struct kmem_cache *od_cache;
+ /**
+ * Used when a new scheduling round commences, in order to synchronize
+ * all object or OST batches with the new round number.
+ */
+ __u64 od_round;
+ /**
+ * Determines the relevant ordering amongst request batches within a
+ * scheduling round.
+ */
+ __u64 od_sequence;
+ /**
+ * RPC types that are currently supported.
+ */
+ enum nrs_orr_supp od_supp;
+ /**
+ * Round Robin quantum; the maxium number of RPCs that each request
+ * batch for each object or OST can have in a scheduling round.
+ */
+ __u16 od_quantum;
+ /**
+ * Whether to use physical disk offsets or logical file offsets.
+ */
+ bool od_physical;
+ /**
+ * XXX: We need to provide a persistently allocated string to hold
+ * unique object names for this policy, since in currently supported
+ * versions of Linux by Lustre, kmem_cache_create() just sets a pointer
+ * to the name string provided. kstrdup() is used in the version of
+ * kmeme_cache_create() in current Linux mainline, so we may be able to
+ * remove this in the future.
+ */
+ char od_objname[NRS_ORR_OBJ_NAME_MAX];
+};
+
+/**
+ * Represents a backend-fs object or OST in the ORR and TRR policies
+ * respectively
+ */
+struct nrs_orr_object {
+ struct ptlrpc_nrs_resource oo_res;
+ cfs_hlist_node_t oo_hnode;
+ /**
+ * The round number against which requests are being scheduled for this
+ * object or OST
+ */
+ __u64 oo_round;
+ /**
+ * The sequence number used for requests scheduled for this object or
+ * OST during the current round number.
+ */
+ __u64 oo_sequence;
+ /**
+ * The key of the object or OST for which this structure instance is
+ * scheduling RPCs
+ */
+ struct nrs_orr_key oo_key;
+ cfs_atomic_t oo_ref;
+ /**
+ * Round Robin quantum; the maximum number of RPCs that are allowed to
+ * be scheduled for the object or OST in a single batch of each round.
+ */
+ __u16 oo_quantum;
+ /**
+ * # of pending requests for this object or OST, on all existing rounds
+ */
+ __u16 oo_active;
+};
+
+/**
+ * ORR/TRR NRS request definition
+ */
+struct nrs_orr_req {
+ /**
+ * The offset range this request covers
+ */
+ struct nrs_orr_req_range or_range;
+ /**
+ * Round number for this request; shared with all other requests in the
+ * same batch.
+ */
+ __u64 or_round;
+ /**
+ * Sequence number for this request; shared with all other requests in
+ * the same batch.
+ */
+ __u64 or_sequence;
+ /**
+ * For debugging purposes.
+ */
+ struct nrs_orr_key or_key;
+ /**
+ * An ORR policy instance has filled in request information while
+ * enqueueing the request on the service partition's regular NRS head.
+ */
+ unsigned int or_orr_set:1;
+ /**
+ * A TRR policy instance has filled in request information while
+ * enqueueing the request on the service partition's regular NRS head.
+ */
+ unsigned int or_trr_set:1;
+ /**
+ * Request offset ranges have been filled in with logical offset
+ * values.
+ */
+ unsigned int or_logical_set:1;
+ /**
+ * Request offset ranges have been filled in with physical offset
+ * values.
+ */
+ unsigned int or_physical_set:1;
+};
+
+/** @} ORR/TRR */
+
+/**
* NRS request
*
* Instances of this object exist embedded within ptlrpc_request; the main
* Fields for the FIFO policy
*/
struct nrs_fifo_req fifo;
+ /**
+ * CRR-N request defintion
+ */
+ struct nrs_crrn_req crr;
+ /** ORR and TRR share the same request definition */
+ struct nrs_orr_req orr;
} nr_u;
/**
* Externally-registering policies may want to use this to allocate
*/
struct ptlrpc_request_set *pc_set;
/**
- * Thread name used in cfs_daemonize()
+ * Thread name used in kthread_run()
*/
char pc_name[16];
/**
__ptlrpc_free_bulk(bulk, 0);
}
void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
- cfs_page_t *page, int pageoffset, int len, int);
+ struct page *page, int pageoffset, int len, int);
static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
- cfs_page_t *page, int pageoffset,
+ struct page *page, int pageoffset,
int len)
{
__ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1);
}
static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
- cfs_page_t *page, int pageoffset,
+ struct page *page, int pageoffset,
int len)
{
__ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
void ptlrpc_daemonize(char *name);
int ptlrpc_service_health_check(struct ptlrpc_service *);
void ptlrpc_server_drop_request(struct ptlrpc_request *req);
+void ptlrpc_request_change_export(struct ptlrpc_request *req,
+ struct obd_export *export);
#ifdef __KERNEL__
int ptlrpc_hr_init(void);
req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment,
newlen, move_data);
}
+
+#ifdef LUSTRE_TRANSLATE_ERRNOS
+
+static inline int ptlrpc_status_hton(int h)
+{
+ /*
+ * Positive errnos must be network errnos, such as LUSTRE_EDEADLK,
+ * ELDLM_LOCK_ABORTED, etc.
+ */
+ if (h < 0)
+ return -lustre_errno_hton(-h);
+ else
+ return h;
+}
+
+static inline int ptlrpc_status_ntoh(int n)
+{
+ /*
+ * See the comment in ptlrpc_status_hton().
+ */
+ if (n < 0)
+ return -lustre_errno_ntoh(-n);
+ else
+ return n;
+}
+
+#else
+
+#define ptlrpc_status_hton(h) (h)
+#define ptlrpc_status_ntoh(n) (n)
+
+#endif
/** @} */
/** Change request phase of \a req to \a new_phase */