From 783de3e99d0a2185d26bc67a9ca852b08a760cf6 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 13 Mar 2013 23:11:34 +0800 Subject: [PATCH] LU-2951 mdt: Increase bc_req_max_size for MDS_REQUEST_PORTAL Large EA tests triggered error messages like this on MDSs: 10:38:21:LNetError: 3022:0:(lib-ptl.c:190:lnet_try_match_md()) Matching packet from 12345-10.10.17.9@tcp, match 1429230968490588 length 65928 too big: 117674 left, 49386 allowed These were the REINT_SETXATTR requests carrying large EA values. They were dropped because the MDSs did not expect request buffers larger than 49386 bytes. This patch increases bc_req_max_size (and bc_rep_max_size) for MDS_REQUEST_PORTAL (and MDS_REPLY_PORTAL) to accommodate REINT_SETXATTR requests (and MDS_GETXATTR replies) with 255-byte names and 65536-byte values (or 65536-byte lists of EA names). Change-Id: Ifdcda6d3e91aa6115e5cafd5abd6c89b15485020 Signed-off-by: Li Wei Reviewed-on: http://review.whamcloud.com/5703 Tested-by: Hudson Reviewed-by: Liang Zhen Reviewed-by: Nathaniel Clark Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre_net.h | 53 +++++++++++++++++++++++++++++++-------------- lustre/mdt/mdt_mds.c | 6 ++--- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index cd6225a..0c8a806 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -315,6 +315,7 @@ #define MDS_OTHR_NTHRS_MAX MDS_MAX_OTHR_THREADS #define MDS_NBUFS 64 + /** * Assume file name length = FNAME_MAX = 256 (true for ext3). * path name length = PATH_MAX = 4096 @@ -355,6 +356,26 @@ #define MDS_LOV_MAXREPSIZE MDS_LOV_MAXREQSIZE /** + * This is the size of a maximum REINT_SETXATTR request: + * + * lustre_msg 56 (32 + 4 x 5 + 4) + * ptlrpc_body 184 + * mdt_rec_setxattr 136 + * lustre_capa 120 + * name 256 (XATTR_NAME_MAX) + * value 65536 (XATTR_SIZE_MAX) + */ +#define MDS_EA_MAXREQSIZE 66288 + +/** + * These are the maximum request and reply sizes (rounded up to 1 KB + * boundaries) for the "regular" MDS_REQUEST_PORTAL and MDS_REPLY_PORTAL. + */ +#define MDS_REG_MAXREQSIZE (((max(MDS_EA_MAXREQSIZE, \ + MDS_LOV_MAXREQSIZE) + 1023) >> 10) << 10) +#define MDS_REG_MAXREPSIZE MDS_REG_MAXREQSIZE + +/** * The update request includes all of updates from the create, which might * include linkea (4K maxim), together with other updates, we set it to 9K: * lustre_msg + ptlrpc_body + UPDATE_BUF_SIZE (8K) @@ -363,35 +384,35 @@ #define MDS_OUT_MAXREPSIZE MDS_MAXREPSIZE /** MDS_BUFSIZE = max_reqsize (w/o LOV EA) + max sptlrpc payload size */ -#define MDS_BUFSIZE max_t(int, MDS_MAXREQSIZE + 1024, 8 * 1024) +#define MDS_BUFSIZE max(MDS_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \ + 8 * 1024) /** - * MDS_LOV_BUFSIZE should be at least max_reqsize (with LOV EA) + - * max sptlrpc payload size, however, we need to allocate a much larger buffer - * for it because LNet requires each MD(rqbd) has at least MDS_LOVE_MAXREQSIZE - * bytes left to avoid dropping of maximum-sized incoming request. - * So if MDS_LOV_BUFSIZE is only a little larger than MDS_LOV_MAXREQSIZE, - * then it can only fit in one request even there are 48K bytes left in - * a rqbd, and memory utilization is very low. + * MDS_REG_BUFSIZE should at least be MDS_REG_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD. + * However, we need to allocate a much larger buffer for it because LNet + * requires each MD(rqbd) has at least MDS_REQ_MAXREQSIZE bytes left to avoid + * dropping of maximum-sized incoming request. So if MDS_REG_BUFSIZE is only a + * little larger than MDS_REG_MAXREQSIZE, then it can only fit in one request + * even there are about MDS_REG_MAX_REQSIZE bytes left in a rqbd, and memory + * utilization is very low. * * In the meanwhile, size of rqbd can't be too large, because rqbd can't be * reused until all requests fit in it have been processed and released, * which means one long blocked request can prevent the rqbd be reused. - * Now we set request buffer size to 128K, so even each rqbd is unlinked - * from LNet with unused 48K, buffer utilization will be about 62%. + * Now we set request buffer size to 160 KB, so even each rqbd is unlinked + * from LNet with unused 65 KB, buffer utilization will be about 59%. * Please check LU-2432 for details. */ -/** MDS_LOV_BUFSIZE = max_reqsize (w/ LOV EA) + max sptlrpc payload size */ -#define MDS_LOV_BUFSIZE max_t(int, MDS_LOV_MAXREQSIZE + 1024, \ - 128 * 1024) +#define MDS_REG_BUFSIZE max(MDS_REG_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \ + 160 * 1024) /** * MDS_OUT_BUFSIZE = max_out_reqsize + max sptlrpc payload (~1K) which is - * about 10K, for the same reason as MDS_LOV_BUFSIZE, we also give some + * about 10K, for the same reason as MDS_REG_BUFSIZE, we also give some * extra bytes to each request buffer to improve buffer utilization rate. */ -#define MDS_OUT_BUFSIZE max_t(int, MDS_OUT_MAXREQSIZE + 1024, \ - 24 * 1024) +#define MDS_OUT_BUFSIZE max(MDS_OUT_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \ + 24 * 1024) /** FLD_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc */ #define FLD_MAXREQSIZE (160) diff --git a/lustre/mdt/mdt_mds.c b/lustre/mdt/mdt_mds.c index 08f4f11..4ec8a51 100644 --- a/lustre/mdt/mdt_mds.c +++ b/lustre/mdt/mdt_mds.c @@ -429,9 +429,9 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR, .psc_buf = { .bc_nbufs = MDS_NBUFS, - .bc_buf_size = MDS_LOV_BUFSIZE, - .bc_req_max_size = MDS_LOV_MAXREQSIZE, - .bc_rep_max_size = MDS_LOV_MAXREPSIZE, + .bc_buf_size = MDS_REG_BUFSIZE, + .bc_req_max_size = MDS_REG_MAXREQSIZE, + .bc_rep_max_size = MDS_REG_MAXREPSIZE, .bc_req_portal = MDS_REQUEST_PORTAL, .bc_rep_portal = MDC_REPLY_PORTAL, }, -- 1.8.3.1