From: Qian Yingjin Date: Fri, 14 May 2021 14:53:52 +0000 (+0800) Subject: LU-14139 ptlrpc: grow PtlRPC properly when prepare sub request X-Git-Tag: 2.15.56~133 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=5a2dfd36f9c2b6c10ab7ba44b0e9e86372623fde;p=fs%2Flustre-release.git LU-14139 ptlrpc: grow PtlRPC properly when prepare sub request In this patch, it prepares and grows PtlRPC reply buffer properly for SUB batch request in @req_capsule_server_pack(). At the same time, it adds a limit of reply buffer size with BUT_MAXREPSIZE = (1000 * 1024). Signed-off-by: Qian Yingjin Change-Id: I4277974b3b0e9cd19fd0d18ae7c029cccaa9c558 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/43707 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Mikhail Pershin Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 42ee5bf..6b700eb 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -373,6 +373,9 @@ #define OUT_MAXREQSIZE (1000 * 1024) #define OUT_MAXREPSIZE MDS_MAXREPSIZE +#define BUT_MAXREQSIZE OUT_MAXREQSIZE +#define BUT_MAXREPSIZE BUT_MAXREQSIZE + /** MDS_BUFSIZE = max_reqsize (w/o LOV EA) + max sptlrpc payload size */ #define MDS_BUFSIZE max(MDS_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \ 8 * 1024) diff --git a/lustre/mdc/mdc_batch.c b/lustre/mdc/mdc_batch.c index d997e42..c2ebd17 100644 --- a/lustre/mdc/mdc_batch.c +++ b/lustre/mdc/mdc_batch.c @@ -138,7 +138,7 @@ static int mdc_batch_getattr_pack(struct batch_update_head *head, req_capsule_set_size(&pill, &RMF_ACL, RCL_SERVER, LUSTRE_POSIX_ACL_MAX_SIZE_OLD); req_capsule_set_size(&pill, &RMF_DEFAULT_MDT_MD, RCL_SERVER, - sizeof(struct lmv_user_md)); + /*sizeof(struct lmv_user_md)*/MIN_MD_SIZE); if (have_secctx) { char *secctx_name; diff --git a/lustre/mdt/mdt_batch.c b/lustre/mdt/mdt_batch.c index cfd6aa1..2dff768 100644 --- a/lustre/mdt/mdt_batch.c +++ b/lustre/mdt/mdt_batch.c @@ -154,6 +154,7 @@ int mdt_batch(struct tgt_session_info *tsi) __u32 update_buf_count; __u32 packed_replen; void **update_bufs; + bool grown = false; int buh_size; int rc; int i; @@ -326,7 +327,11 @@ next: * As @repmsg may be changed if the reply buffer is * too small to grow, thus it needs to reload it here. */ - repmsg = pill->rc_repmsg; + if (repmsg != pill->rc_repmsg) { + repmsg = pill->rc_repmsg; + grown = true; + } + repmsg->lm_result = rc; mdt_thread_info_reset(info); @@ -342,9 +347,17 @@ next: req_capsule_shrink(&req->rq_pill, &RMF_BUT_REPLY, packed_replen, RCL_SERVER); out: - if (reply != NULL) + if (reply != NULL) { + if (grown) { + reply = req_capsule_server_get(&req->rq_pill, + &RMF_BUT_REPLY); + if (reply == NULL) + GOTO(out_free, rc = -EPROTO); + } reply->burp_count = handled_update_count; + } +out_free: if (update_bufs != NULL) { if (bub != NULL) { for (i = 0; i < update_buf_count; i++, bub++) { diff --git a/lustre/ptlrpc/batch.c b/lustre/ptlrpc/batch.c index d678211..6381cc0 100644 --- a/lustre/ptlrpc/batch.c +++ b/lustre/ptlrpc/batch.c @@ -365,11 +365,16 @@ static int batch_update_request_fini(struct batch_update_head *head, */ repmsg = NULL; rc1 = -ECANCELED; + /* + * TODO: resend the unfinished sub request when the + * return code is -EOVERFLOW. + */ } if (ouc->ouc_interpret != NULL) ouc->ouc_interpret(req, repmsg, ouc, rc1); + index++; object_update_callback_fini(ouc); if (rc == 0 && rc1 < 0) rc = rc1; diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c index d8057b8..4fb7b28 100644 --- a/lustre/ptlrpc/layout.c +++ b/lustre/ptlrpc/layout.c @@ -2023,17 +2023,59 @@ int req_capsule_server_pack(struct req_capsule *pill) count, fmt->rf_name); } } else { /* SUB request */ + struct ptlrpc_request *req = pill->rc_req; + __u32 used_len; __u32 msg_len; msg_len = lustre_msg_size_v2(count, pill->rc_area[RCL_SERVER]); - if (msg_len > pill->rc_reqmsg->lm_repsize) { + used_len = (char *)pill->rc_repmsg - (char *)req->rq_repmsg; + /* Overflow the reply buffer */ + if (used_len + msg_len > req->rq_replen) { + __u32 len; + __u32 max; + + if (!req_capsule_has_field(&req->rq_pill, + &RMF_BUT_REPLY, RCL_SERVER)) + return -EINVAL; + + if (!req_capsule_field_present(&req->rq_pill, + &RMF_BUT_REPLY, + RCL_SERVER)) + return -EINVAL; + + if (used_len + msg_len > BUT_MAXREPSIZE) + return -EOVERFLOW; + + len = req_capsule_get_size(&req->rq_pill, + &RMF_BUT_REPLY, RCL_SERVER); + /* + * Currently just increase the batch reply buffer + * by 2. + */ + max = BUT_MAXREPSIZE - req->rq_replen; + if (used_len + msg_len > len) + len = used_len + msg_len; + + if (len > max) + len += max; + else + len += len; + rc = req_capsule_server_grow(&req->rq_pill, + &RMF_BUT_REPLY, len); + if (rc) + return rc; + + pill->rc_repmsg = + (struct lustre_msg *)((char *)req->rq_repmsg + + used_len); + } + if (msg_len > pill->rc_reqmsg->lm_repsize) /* TODO: Check whether there is enough buffer size */ CDEBUG(D_INFO, "Overflow pack %d fields in format '%s' for " "the SUB request with message len %u:%u\n", count, fmt->rf_name, msg_len, pill->rc_reqmsg->lm_repsize); - } rc = 0; lustre_init_msg_v2(pill->rc_repmsg, count, @@ -2684,7 +2726,7 @@ int req_capsule_server_grow(struct req_capsule *pill, struct ptlrpc_reply_state *rs = req->rq_reply_state, *nrs; char *from, *to, *sptr = NULL; __u32 slen = 0, snewlen = 0; - __u32 offset, len; + __u32 offset, len, max, diff; int rc; LASSERT(pill->rc_fmt != NULL); @@ -2718,13 +2760,23 @@ int req_capsule_server_grow(struct req_capsule *pill, } /* - * Currently just increase the reply buffer by 2 * newlen. + * Currently first try to increase the reply buffer by + * 2 * newlen with reply buffer limit of BUT_MAXREPSIZE. * TODO: Enlarge the reply buffer properly according to the * left SUB requests in the batch PTLRPC request. */ snewlen = newlen; + diff = snewlen - slen; + max = BUT_MAXREPSIZE - req->rq_replen; + if (diff > max) + return -EOVERFLOW; + + if (diff * 2 + len < max) + newlen = (len + diff) * 2; + else + newlen = len + max; + req_capsule_set_size(pill, field, RCL_SERVER, snewlen); - newlen = len + cfs_size_round(2 * snewlen); req_capsule_set_size(&req->rq_pill, &RMF_BUT_REPLY, RCL_SERVER, newlen); offset = __req_capsule_offset(&req->rq_pill, &RMF_BUT_REPLY, diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 11ba80a..75627fe 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13771,6 +13771,30 @@ test_123d() { } run_test 123d "Statahead on striped directories works correctly" +test_123e() { + local max + local batch_max + local dir=$DIR/$tdir + + mkdir $dir || error "mkdir $dir failed" + $LFS setstripe -C 32 $dir || error "setstripe $dir failed" + + touch $dir/$tfile.{0..1000} || error "touch 1000 files failed" + + max=$($LCTL get_param -n llite.*.statahead_max | head -n 1) + batch_max=$($LCTL get_param -n llite.*.statahead_batch_max | head -n 1) + stack_trap "$LCTL set_param llite.*.statahead_max=$max" EXIT + stack_trap "$LCTL set_param llite.*.statahead_batch_max=$batch_max" EXIT + + $LCTL set_param llite.*.statahead_max=2048 + $LCTL set_param llite.*.statahead_batch_max=1024 + + ls -l $dir + $LCTL get_param mdc.*.batch_stats + $LCTL get_param llite.*.statahead_* +} +run_test 123e "statahead with large wide striping" + test_124a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" $LCTL get_param -n mdc.*.connect_flags | grep -q lru_resize ||