*
* Examples
*
- * #define MDT_NTHRS_INIT 2
- * #define MDT_NTHRS_BASE 64
- * #define MDT_NTHRS_FACTOR 8
- * #define MDT_NTHRS_MAX 1024
+ * #define MDS_NTHRS_INIT 2
+ * #define MDS_NTHRS_BASE 64
+ * #define MDS_NTHRS_FACTOR 8
+ * #define MDS_NTHRS_MAX 1024
*
* Example 1):
* ---------------------------------------------------------------------
* Server(A) has 16 cores, user configured it to 4 partitions so each
* partition has 4 cores, then actual number of service threads on each
* partition is:
- * MDT_NTHRS_BASE(64) + cores(4) * MDT_NTHRS_FACTOR(8) = 96
+ * MDS_NTHRS_BASE(64) + cores(4) * MDS_NTHRS_FACTOR(8) = 96
*
* Total number of threads for the service is:
* 96 * partitions(4) = 384
* Server(B) has 32 cores, user configured it to 4 partitions so each
* partition has 8 cores, then actual number of service threads on each
* partition is:
- * MDT_NTHRS_BASE(64) + cores(8) * MDT_NTHRS_FACTOR(8) = 128
+ * MDS_NTHRS_BASE(64) + cores(8) * MDS_NTHRS_FACTOR(8) = 128
*
* Total number of threads for the service is:
* 128 * partitions(4) = 512
* Server(B) has 96 cores, user configured it to 8 partitions so each
* partition has 12 cores, then actual number of service threads on each
* partition is:
- * MDT_NTHRS_BASE(64) + cores(12) * MDT_NTHRS_FACTOR(8) = 160
+ * MDS_NTHRS_BASE(64) + cores(12) * MDS_NTHRS_FACTOR(8) = 160
*
* Total number of threads for the service is:
* 160 * partitions(8) = 1280
*
- * However, it's above the soft limit MDT_NTHRS_MAX, so we choose this number
+ * However, it's above the soft limit MDS_NTHRS_MAX, so we choose this number
* as upper limit of threads number for each partition:
- * MDT_NTHRS_MAX(1024) / partitions(8) = 128
+ * MDS_NTHRS_MAX(1024) / partitions(8) = 128
*
* Example 4):
* ---------------------------------------------------------------------
* Server(C) have a thousand of cores and user configured it to 32 partitions
- * MDT_NTHRS_BASE(64) * 32 = 2048
+ * MDS_NTHRS_BASE(64) * 32 = 2048
*
- * which is already above soft limit MDT_NTHRS_MAX(1024), but we still need
- * to guarantee that each partition has at least MDT_NTHRS_BASE(64) threads
+ * which is already above soft limit MDS_NTHRS_MAX(1024), but we still need
+ * to guarantee that each partition has at least MDS_NTHRS_BASE(64) threads
* to keep service healthy, so total number of threads will just be 2048.
*
* NB: we don't suggest to choose server with that many cores because backend
* Please see examples in "Thread Constants", MDS threads number will be at
* the comparable level of old versions, unless the server has many cores.
*/
-#ifndef MDT_MAX_THREADS
-#define MDT_MAX_THREADS 1024
-#define MDT_MAX_OTHR_THREADS 256
-
-#else /* MDT_MAX_THREADS */
-#if MDT_MAX_THREADS < PTLRPC_NTHRS_INIT
-#undef MDT_MAX_THREADS
-#define MDT_MAX_THREADS PTLRPC_NTHRS_INIT
+#ifndef MDS_MAX_THREADS
+#define MDS_MAX_THREADS 1024
+#define MDS_MAX_OTHR_THREADS 256
+
+#else /* MDS_MAX_THREADS */
+#if MDS_MAX_THREADS < PTLRPC_NTHRS_INIT
+#undef MDS_MAX_THREADS
+#define MDS_MAX_THREADS PTLRPC_NTHRS_INIT
#endif
-#define MDT_MAX_OTHR_THREADS max(PTLRPC_NTHRS_INIT, MDT_MAX_THREADS / 2)
+#define MDS_MAX_OTHR_THREADS max(PTLRPC_NTHRS_INIT, MDS_MAX_THREADS / 2)
#endif
/* default service */
-#define MDT_THR_FACTOR 8
-#define MDT_NTHRS_INIT PTLRPC_NTHRS_INIT
-#define MDT_NTHRS_MAX MDT_MAX_THREADS
-#define MDT_NTHRS_BASE min(64, MDT_NTHRS_MAX)
+#define MDS_THR_FACTOR 8
+#define MDS_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define MDS_NTHRS_MAX MDS_MAX_THREADS
+#define MDS_NTHRS_BASE min(64, MDS_NTHRS_MAX)
/* read-page service */
-#define MDT_RDPG_THR_FACTOR 4
-#define MDT_RDPG_NTHRS_INIT PTLRPC_NTHRS_INIT
-#define MDT_RDPG_NTHRS_MAX MDT_MAX_OTHR_THREADS
-#define MDT_RDPG_NTHRS_BASE min(48, MDT_RDPG_NTHRS_MAX)
+#define MDS_RDPG_THR_FACTOR 4
+#define MDS_RDPG_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define MDS_RDPG_NTHRS_MAX MDS_MAX_OTHR_THREADS
+#define MDS_RDPG_NTHRS_BASE min(48, MDS_RDPG_NTHRS_MAX)
/* these should be removed when we remove setattr service in the future */
-#define MDT_SETA_THR_FACTOR 4
-#define MDT_SETA_NTHRS_INIT PTLRPC_NTHRS_INIT
-#define MDT_SETA_NTHRS_MAX MDT_MAX_OTHR_THREADS
-#define MDT_SETA_NTHRS_BASE min(48, MDT_SETA_NTHRS_MAX)
+#define MDS_SETA_THR_FACTOR 4
+#define MDS_SETA_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define MDS_SETA_NTHRS_MAX MDS_MAX_OTHR_THREADS
+#define MDS_SETA_NTHRS_BASE min(48, MDS_SETA_NTHRS_MAX)
/* non-affinity threads */
-#define MDT_OTHR_NTHRS_INIT PTLRPC_NTHRS_INIT
-#define MDT_OTHR_NTHRS_MAX MDT_MAX_OTHR_THREADS
+#define MDS_OTHR_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define MDS_OTHR_NTHRS_MAX MDS_MAX_OTHR_THREADS
#define MDS_NBUFS (64 * cfs_num_online_cpus())
/**
* locked so that any old caller can communicate requests to
* the set holder who can then fold them into the lock-free set
*/
- cfs_spinlock_t set_new_req_lock;
+ spinlock_t set_new_req_lock;
/** List of new yet unsent requests. Only used with ptlrpcd now. */
cfs_list_t set_new_requests;
cfs_list_t rs_debug_list;
#endif
/** A spinlock to protect the reply state flags */
- cfs_spinlock_t rs_lock;
+ spinlock_t rs_lock;
/** Reply state flags */
unsigned long rs_difficult:1; /* ACK/commit stuff */
unsigned long rs_no_ack:1; /* no ACK, even for
* any allocations (to avoid e.g. OOM).
*/
struct ptlrpc_request_pool {
- /** Locks the list */
- cfs_spinlock_t prp_lock;
+ /** Locks the list */
+ spinlock_t prp_lock;
/** list of ptlrpc_request structs */
cfs_list_t prp_req_list;
/** Maximum message size that would fit into a rquest from this pool */
* in Lustre.
*/
struct ptlrpc_request {
- /* Request type: one of PTL_RPC_MSG_* */
- int rq_type;
+ /* Request type: one of PTL_RPC_MSG_* */
+ int rq_type;
+ /** Result of request processing */
+ int rq_status;
/**
* Linkage item through which this request is included into
* sending/delayed lists on client and into rqbd list on server
cfs_list_t rq_exp_list;
/** server-side hp handlers */
struct ptlrpc_hpreq_ops *rq_ops;
+
+ /** initial thread servicing this request */
+ struct ptlrpc_thread *rq_svc_thread;
+
/** history sequence # */
__u64 rq_history_seq;
/** the index of service's srv_at_array into which request is linked */
time_t rq_at_index;
- /** Result of request processing */
- int rq_status;
/** Lock to protect request flags and some other important bits, like
* rq_list
*/
- cfs_spinlock_t rq_lock;
- /** client-side flags are serialized by rq_lock */
- unsigned long rq_intr:1, rq_replied:1, rq_err:1,
+ spinlock_t rq_lock;
+ /** client-side flags are serialized by rq_lock */
+ unsigned int rq_intr:1, rq_replied:1, rq_err:1,
rq_timedout:1, rq_resend:1, rq_restart:1,
/**
* when ->rq_replay is set, request is kept by the client even
rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
rq_early:1, rq_must_unlink:1,
- rq_fake:1, /* this fake req */
rq_memalloc:1, /* req originated from "kswapd" */
/* server-side flags */
rq_packed_final:1, /* packed final reply */
rq_committed:1,
/* whether the "rq_set" is a valid one */
rq_invalid_rqset:1,
- rq_generation_set:1;
+ rq_generation_set:1,
+ /* do not resend request on -EINPROGRESS */
+ rq_no_retry_einprogress:1;
+
+ unsigned int rq_nr_resend;
enum rq_phase rq_phase; /* one of RQ_PHASE_* */
enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */
cfs_atomic_t rq_refcount;/* client-side refcount for SENT race,
server-side refcounf for multiple replies */
- /** initial thread servicing this request */
- struct ptlrpc_thread *rq_svc_thread;
-
- /** Portal to which this request would be sent */
- int rq_request_portal; /* XXX FIXME bug 249 */
- /** Portal where to wait for reply and where reply would be sent */
- int rq_reply_portal; /* XXX FIXME bug 249 */
+ /** Portal to which this request would be sent */
+ short rq_request_portal; /* XXX FIXME bug 249 */
+ /** Portal where to wait for reply and where reply would be sent */
+ short rq_reply_portal; /* XXX FIXME bug 249 */
/**
* client-side:
int rq_nob_received;
/** Request length */
int rq_reqlen;
- /** Request message - what client sent */
- struct lustre_msg *rq_reqmsg;
-
/** Reply length */
int rq_replen;
+ /** Request message - what client sent */
+ struct lustre_msg *rq_reqmsg;
/** Reply message - server response */
struct lustre_msg *rq_repmsg;
/** Transaction number */
struct sptlrpc_flavor rq_flvr; /**< for client & server */
enum lustre_sec_part rq_sp_from;
- unsigned long /* client/server security flags */
+ /* client/server security flags */
+ unsigned int
rq_ctx_init:1, /* context initiation */
rq_ctx_fini:1, /* context destroy */
rq_bulk_read:1, /* request bulk read */
/* (server side), pointed directly into req buffer */
struct ptlrpc_user_desc *rq_user_desc;
- /** early replies go to offset 0, regular replies go after that */
- unsigned int rq_reply_off;
-
/* various buffer pointers */
struct lustre_msg *rq_reqbuf; /* req wrapper */
+ char *rq_repbuf; /* rep buffer */
+ struct lustre_msg *rq_repdata; /* rep wrapper msg */
+ struct lustre_msg *rq_clrbuf; /* only in priv mode */
int rq_reqbuf_len; /* req wrapper buf len */
int rq_reqdata_len; /* req wrapper msg len */
- char *rq_repbuf; /* rep buffer */
int rq_repbuf_len; /* rep buffer len */
- struct lustre_msg *rq_repdata; /* rep wrapper msg */
int rq_repdata_len; /* rep wrapper msg len */
- struct lustre_msg *rq_clrbuf; /* only in priv mode */
int rq_clrbuf_len; /* only in priv mode */
int rq_clrdata_len; /* only in priv mode */
+ /** early replies go to offset 0, regular replies go after that */
+ unsigned int rq_reply_off;
+
/** @} */
/** Fields that help to see if request and reply were swabbed or not */
int rq_timeout;
/** Multi-rpc bits */
- /** Link item for request set lists */
- cfs_list_t rq_set_chain;
/** Per-request waitq introduced by bug 21938 for recovery waiting */
cfs_waitq_t rq_set_waitq;
+ /** Link item for request set lists */
+ cfs_list_t rq_set_chain;
/** Link back to the request set */
struct ptlrpc_request_set *rq_set;
/** Async completion handler, called when reply is received */
/** client side */
unsigned long bd_registered:1;
/** For serialization with callback */
- cfs_spinlock_t bd_lock;
+ spinlock_t bd_lock;
/** Import generation when request for this bulk was sent */
int bd_import_generation;
/** Server side - export this bulk created for */
*/
struct ptlrpc_service {
/** serialize /proc operations */
- cfs_spinlock_t srv_lock;
+ spinlock_t srv_lock;
/** most often accessed fields */
/** chain thru all services */
cfs_list_t srv_list;
* rqbd list and incoming requests waiting for preprocess,
* threads starting & stopping are also protected by this lock.
*/
- cfs_spinlock_t scp_lock __cfs_cacheline_aligned;
+ spinlock_t scp_lock __cfs_cacheline_aligned;
/** total # req buffer descs allocated */
int scp_nrqbds_total;
/** # posted request buffers for receiving */
int scp_nrqbds_posted;
+ /** in progress of allocating rqbd */
+ int scp_rqbd_allocating;
/** # incoming reqs */
int scp_nreqs_incoming;
/** request buffers to be reposted */
* serialize the following fields, used for processing requests
* sent to this portal
*/
- cfs_spinlock_t scp_req_lock __cfs_cacheline_aligned;
+ spinlock_t scp_req_lock __cfs_cacheline_aligned;
/** # reqs in either of the queues below */
/** reqs waiting for service */
cfs_list_t scp_req_pending;
* serialize the following fields, used for changes on
* adaptive timeout
*/
- cfs_spinlock_t scp_at_lock __cfs_cacheline_aligned;
+ spinlock_t scp_at_lock __cfs_cacheline_aligned;
/** estimated rpc service time */
struct adaptive_timeout scp_at_estimate;
/** reqs waiting for replies */
* serialize the following fields, used for processing
* replies for this portal
*/
- cfs_spinlock_t scp_rep_lock __cfs_cacheline_aligned;
+ spinlock_t scp_rep_lock __cfs_cacheline_aligned;
/** all the active replies */
cfs_list_t scp_rep_active;
#ifndef __KERNEL__
* Declaration of ptlrpcd control structure
*/
struct ptlrpcd_ctl {
- /**
- * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE)
- */
- unsigned long pc_flags;
- /**
- * Thread lock protecting structure fields.
- */
- cfs_spinlock_t pc_lock;
- /**
- * Start completion.
- */
- cfs_completion_t pc_starting;
- /**
- * Stop completion.
- */
- cfs_completion_t pc_finishing;
+ /**
+ * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE)
+ */
+ unsigned long pc_flags;
+ /**
+ * Thread lock protecting structure fields.
+ */
+ spinlock_t pc_lock;
+ /**
+ * Start completion.
+ */
+ struct completion pc_starting;
+ /**
+ * Stop completion.
+ */
+ struct completion pc_finishing;
/**
* Thread requests set.
*/
static inline int ptlrpc_server_bulk_active(struct ptlrpc_bulk_desc *desc)
{
- int rc;
+ int rc;
- LASSERT(desc != NULL);
+ LASSERT(desc != NULL);
- cfs_spin_lock(&desc->bd_lock);
- rc = desc->bd_network_rw;
- cfs_spin_unlock(&desc->bd_lock);
- return rc;
+ spin_lock(&desc->bd_lock);
+ rc = desc->bd_network_rw;
+ spin_unlock(&desc->bd_lock);
+ return rc;
}
#endif
if (!desc)
return 0;
- cfs_spin_lock(&desc->bd_lock);
- rc = desc->bd_network_rw;
- cfs_spin_unlock(&desc->bd_lock);
- return rc;
+ spin_lock(&desc->bd_lock);
+ rc = desc->bd_network_rw;
+ spin_unlock(&desc->bd_lock);
+ return rc;
}
#define PTLRPC_REPLY_MAYBE_DIFFICULT 0x01
int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
__u32 version, int opcode, char **bufs,
struct ptlrpc_cli_ctx *ctx);
-struct ptlrpc_request *ptlrpc_prep_fakereq(struct obd_import *imp,
- unsigned int timeout,
- ptlrpc_interpterer_t interpreter);
-void ptlrpc_fakereq_finished(struct ptlrpc_request *req);
-
struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version,
int opcode, int count, __u32 *lengths,
char **bufs);
struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
int npages, int type, int portal);
-void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
-void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
- cfs_page_t *page, int pageoffset, int len);
+void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin);
+static inline void ptlrpc_free_bulk_pin(struct ptlrpc_bulk_desc *bulk)
+{
+ __ptlrpc_free_bulk(bulk, 1);
+}
+static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk)
+{
+ __ptlrpc_free_bulk(bulk, 0);
+}
+void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
+ cfs_page_t *page, int pageoffset, int len, int);
+static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
+ cfs_page_t *page, int pageoffset,
+ int len)
+{
+ __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1);
+}
+
+static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
+ cfs_page_t *page, int pageoffset,
+ int len)
+{
+ __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
+}
+
void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
struct obd_import *imp);
__u64 ptlrpc_next_xid(void);
__u32 lustre_msg_get_service_time(struct lustre_msg *msg);
char *lustre_msg_get_jobid(struct lustre_msg *msg);
__u32 lustre_msg_get_cksum(struct lustre_msg *msg);
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 0, 0)
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
__u32 lustre_msg_calc_cksum(struct lustre_msg *msg, int compat18);
#else
# warning "remove checksum compatibility support for b1_8"
static inline int
ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
{
- int rc;
-
- cfs_spin_lock(&req->rq_lock);
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
- req->rq_reply_deadline > cfs_time_current_sec()) {
- cfs_spin_unlock(&req->rq_lock);
- return 1;
- }
- rc = req->rq_receiving_reply || req->rq_must_unlink;
- cfs_spin_unlock(&req->rq_lock);
- return rc;
+ int rc;
+
+ spin_lock(&req->rq_lock);
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ req->rq_reply_deadline > cfs_time_current_sec()) {
+ spin_unlock(&req->rq_lock);
+ return 1;
+ }
+ rc = req->rq_receiving_reply || req->rq_must_unlink;
+ spin_unlock(&req->rq_lock);
+ return rc;
}
static inline void
static inline int ptlrpc_no_resend(struct ptlrpc_request *req)
{
- if (!req->rq_no_resend && ptlrpc_send_limit_expired(req)) {
- cfs_spin_lock(&req->rq_lock);
- req->rq_no_resend = 1;
- cfs_spin_unlock(&req->rq_lock);
- }
- return req->rq_no_resend;
+ if (!req->rq_no_resend && ptlrpc_send_limit_expired(req)) {
+ spin_lock(&req->rq_lock);
+ req->rq_no_resend = 1;
+ spin_unlock(&req->rq_lock);
+ }
+ return req->rq_no_resend;
}
static inline int
/** @} */
/* ptlrpc/llog_server.c */
-int llog_origin_handle_create(struct ptlrpc_request *req);
+int llog_origin_handle_open(struct ptlrpc_request *req);
int llog_origin_handle_destroy(struct ptlrpc_request *req);
int llog_origin_handle_prev_block(struct ptlrpc_request *req);
int llog_origin_handle_next_block(struct ptlrpc_request *req);
int llog_origin_handle_read_header(struct ptlrpc_request *req);
int llog_origin_handle_close(struct ptlrpc_request *req);
int llog_origin_handle_cancel(struct ptlrpc_request *req);
-int llog_catinfo(struct ptlrpc_request *req);
/* ptlrpc/llog_client.c */
extern struct llog_operations llog_client_ops;