#define LDLM_NTHRS_MAX (cfs_num_online_cpus() == 1 ? 64 : 128)
#define LDLM_BL_THREADS LDLM_NTHRS_AUTO_INIT
-#define LDLM_NBUFS (64 * cfs_num_online_cpus())
+#define LDLM_NBUFS 64
#define LDLM_BUFSIZE (8 * 1024)
#define LDLM_MAXREQSIZE (5 * 1024)
#define LDLM_MAXREPSIZE (1024)
#define MDS_OTHR_NTHRS_INIT PTLRPC_NTHRS_INIT
#define MDS_OTHR_NTHRS_MAX MDS_MAX_OTHR_THREADS
-#define MDS_NBUFS (64 * cfs_num_online_cpus())
+#define MDS_NBUFS 64
/**
* Assume file name length = FNAME_MAX = 256 (true for ext3).
- * path name length = PATH_MAX = 4096
- * LOV MD size max = EA_MAX = 48000 (2000 stripes)
+ * path name length = PATH_MAX = 4096
+ * LOV MD size max = EA_MAX = 24 * 2000
+ * (NB: 24 is size of lov_ost_data)
+ * LOV LOGCOOKIE size max = 32 * 2000
+ * (NB: 32 is size of llog_cookie)
* symlink: FNAME_MAX + PATH_MAX <- largest
* link: FNAME_MAX + PATH_MAX (mds_rec_link < mds_rec_create)
* rename: FNAME_MAX + FNAME_MAX
* MDS_MAXREQSIZE ~= 4736 bytes =
* lustre_msg + ldlm_request + mdt_body + mds_rec_create + FNAME_MAX + PATH_MAX
* MDS_MAXREPSIZE ~= 8300 bytes = lustre_msg + llog_header
- * or, for mds_close() and mds_reint_unlink() on a many-OST filesystem:
- * = 9210 bytes = lustre_msg + mdt_body + 160 * (easize + cookiesize)
*
* Realistic size is about 512 bytes (20 character name + 128 char symlink),
* except in the open case where there are a large number of OSTs in a LOV.
*/
-#define MDS_MAXREPSIZE max(10 * 1024, 362 + LOV_MAX_STRIPE_COUNT * 56)
-#define MDS_MAXREQSIZE MDS_MAXREPSIZE
+#define MDS_MAXREQSIZE (5 * 1024) /* >= 4736 */
+#define MDS_MAXREPSIZE (9 * 1024) /* >= 8300 */
-/** MDS_BUFSIZE = max_reqsize + max sptlrpc payload size */
-#define MDS_BUFSIZE (MDS_MAXREQSIZE + 1024)
+/**
+ * MDS incoming request with LOV EA
+ * 24 = sizeof(struct lov_ost_data), i.e: replay of opencreate
+ */
+#define MDS_LOV_MAXREQSIZE max(MDS_MAXREQSIZE, \
+ 362 + LOV_MAX_STRIPE_COUNT * 24)
+/**
+ * MDS outgoing reply with LOV EA
+ *
+ * NB: max reply size Lustre 2.4+ client can get from old MDS is:
+ * LOV_MAX_STRIPE_COUNT * (llog_cookie + lov_ost_data) + extra bytes
+ *
+ * but 2.4 or later MDS will never send reply with llog_cookie to any
+ * version client. This macro is defined for server side reply buffer size.
+ */
+#define MDS_LOV_MAXREPSIZE MDS_LOV_MAXREQSIZE
+
+/**
+ * The update request includes all of updates from the create, which might
+ * include linkea (4K maxim), together with other updates, we set it to 9K:
+ * lustre_msg + ptlrpc_body + UPDATE_BUF_SIZE (8K)
+ */
+#define MDS_OUT_MAXREQSIZE (9 * 1024)
+#define MDS_OUT_MAXREPSIZE MDS_MAXREPSIZE
+
+/** MDS_BUFSIZE = max_reqsize (w/o LOV EA) + max sptlrpc payload size */
+#define MDS_BUFSIZE max_t(int, MDS_MAXREQSIZE + 1024, 8 * 1024)
+
+/**
+ * MDS_LOV_BUFSIZE should be at least max_reqsize (with LOV EA) +
+ * max sptlrpc payload size, however, we need to allocate a much larger buffer
+ * for it because LNet requires each MD(rqbd) has at least MDS_LOVE_MAXREQSIZE
+ * bytes left to avoid dropping of maximum-sized incoming request.
+ * So if MDS_LOV_BUFSIZE is only a little larger than MDS_LOV_MAXREQSIZE,
+ * then it can only fit in one request even there are 48K bytes left in
+ * a rqbd, and memory utilization is very low.
+ *
+ * In the meanwhile, size of rqbd can't be too large, because rqbd can't be
+ * reused until all requests fit in it have been processed and released,
+ * which means one long blocked request can prevent the rqbd be reused.
+ * Now we set request buffer size to 128K, so even each rqbd is unlinked
+ * from LNet with unused 48K, buffer utilization will be about 62%.
+ * Please check LU-2432 for details.
+ */
+/** MDS_LOV_BUFSIZE = max_reqsize (w/ LOV EA) + max sptlrpc payload size */
+#define MDS_LOV_BUFSIZE max_t(int, MDS_LOV_MAXREQSIZE + 1024, \
+ 128 * 1024)
+
+/**
+ * MDS_OUT_BUFSIZE = max_out_reqsize + max sptlrpc payload (~1K) which is
+ * about 10K, for the same reason as MDS_LOV_BUFSIZE, we also give some
+ * extra bytes to each request buffer to improve buffer utilization rate.
+ */
+#define MDS_OUT_BUFSIZE max_t(int, MDS_OUT_MAXREQSIZE + 1024, \
+ 24 * 1024)
/** FLD_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc */
#define FLD_MAXREQSIZE (160)
/** FLD_MAXREPSIZE == lustre_msg + ptlrpc_body */
#define FLD_MAXREPSIZE (152)
+#define FLD_BUFSIZE (1 << 12)
/**
* SEQ_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + lu_range +
/** SEQ_MAXREPSIZE == lustre_msg + ptlrpc_body + lu_range */
#define SEQ_MAXREPSIZE (152)
+#define SEQ_BUFSIZE (1 << 12)
/** MGS threads must be >= 3, see bug 22458 comment #28 */
#define MGS_NTHRS_INIT (PTLRPC_NTHRS_INIT + 1)
#define MGS_NTHRS_MAX 32
-#define MGS_NBUFS (64 * cfs_num_online_cpus())
+#define MGS_NBUFS 64
#define MGS_BUFSIZE (8 * 1024)
#define MGS_MAXREQSIZE (7 * 1024)
#define MGS_MAXREPSIZE (9 * 1024)
#define OST_MAXREPSIZE (9 * 1024)
-#define OST_NBUFS (64 * cfs_num_online_cpus())
+#define OST_NBUFS 64
#define OST_BUFSIZE (OST_MAXREQSIZE + 1024)
/* Macro to hide a typecast. */
/** @} */
struct ptlrpc_service_buf_conf {
- /* nbufs is how many buffers to post */
+ /* nbufs is buffers # to allocate when growing the pool */
unsigned int bc_nbufs;
/* buffer size to post */
unsigned int bc_buf_size;
/* ptlrpc/ptlrpcd.c */
void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force);
+void ptlrpcd_free(struct ptlrpcd_ctl *pc);
void ptlrpcd_wake(struct ptlrpc_request *req);
void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx);
void ptlrpcd_add_rqset(struct ptlrpc_request_set *set);