Whamcloud - gitweb
LU-14139 ptlrpc: grow PtlRPC properly when prepare sub request 07/43707/14
authorQian Yingjin <qian@ddn.com>
Fri, 14 May 2021 14:53:52 +0000 (22:53 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 22 Apr 2023 17:32:42 +0000 (17:32 +0000)
In this patch, it prepares and grows PtlRPC reply buffer
properly for SUB batch request in @req_capsule_server_pack().

At the same time, it adds a limit of reply buffer size with
BUT_MAXREPSIZE = (1000 * 1024).

Signed-off-by: Qian Yingjin <qian@ddn.com>
Change-Id: I4277974b3b0e9cd19fd0d18ae7c029cccaa9c558
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/43707
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_net.h
lustre/mdc/mdc_batch.c
lustre/mdt/mdt_batch.c
lustre/ptlrpc/batch.c
lustre/ptlrpc/layout.c
lustre/tests/sanity.sh

index 42ee5bf..6b700eb 100644 (file)
 #define OUT_MAXREQSIZE (1000 * 1024)
 #define OUT_MAXREPSIZE MDS_MAXREPSIZE
 
+#define BUT_MAXREQSIZE OUT_MAXREQSIZE
+#define BUT_MAXREPSIZE BUT_MAXREQSIZE
+
 /** MDS_BUFSIZE = max_reqsize (w/o LOV EA) + max sptlrpc payload size */
 #define MDS_BUFSIZE            max(MDS_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \
                                    8 * 1024)
index d997e42..c2ebd17 100644 (file)
@@ -138,7 +138,7 @@ static int mdc_batch_getattr_pack(struct batch_update_head *head,
        req_capsule_set_size(&pill, &RMF_ACL, RCL_SERVER,
                             LUSTRE_POSIX_ACL_MAX_SIZE_OLD);
        req_capsule_set_size(&pill, &RMF_DEFAULT_MDT_MD, RCL_SERVER,
-                            sizeof(struct lmv_user_md));
+                            /*sizeof(struct lmv_user_md)*/MIN_MD_SIZE);
 
        if (have_secctx) {
                char *secctx_name;
index cfd6aa1..2dff768 100644 (file)
@@ -154,6 +154,7 @@ int mdt_batch(struct tgt_session_info *tsi)
        __u32 update_buf_count;
        __u32 packed_replen;
        void **update_bufs;
+       bool grown = false;
        int buh_size;
        int rc;
        int i;
@@ -326,7 +327,11 @@ next:
                         * As @repmsg may be changed if the reply buffer is
                         * too small to grow, thus it needs to reload it here.
                         */
-                       repmsg = pill->rc_repmsg;
+                       if (repmsg != pill->rc_repmsg) {
+                               repmsg = pill->rc_repmsg;
+                               grown = true;
+                       }
+
                        repmsg->lm_result = rc;
                        mdt_thread_info_reset(info);
 
@@ -342,9 +347,17 @@ next:
                req_capsule_shrink(&req->rq_pill, &RMF_BUT_REPLY,
                                   packed_replen, RCL_SERVER);
 out:
-       if (reply != NULL)
+       if (reply != NULL) {
+               if (grown) {
+                       reply = req_capsule_server_get(&req->rq_pill,
+                                                      &RMF_BUT_REPLY);
+                       if (reply == NULL)
+                               GOTO(out_free, rc = -EPROTO);
+               }
                reply->burp_count = handled_update_count;
+       }
 
+out_free:
        if (update_bufs != NULL) {
                if (bub != NULL) {
                        for (i = 0; i < update_buf_count; i++, bub++) {
index d678211..6381cc0 100644 (file)
@@ -365,11 +365,16 @@ static int batch_update_request_fini(struct batch_update_head *head,
                         */
                        repmsg = NULL;
                        rc1 = -ECANCELED;
+                       /*
+                        * TODO: resend the unfinished sub request when the
+                        * return code is -EOVERFLOW.
+                        */
                }
 
                if (ouc->ouc_interpret != NULL)
                        ouc->ouc_interpret(req, repmsg, ouc, rc1);
 
+               index++;
                object_update_callback_fini(ouc);
                if (rc == 0 && rc1 < 0)
                        rc = rc1;
index d8057b8..4fb7b28 100644 (file)
@@ -2023,17 +2023,59 @@ int req_capsule_server_pack(struct req_capsule *pill)
                                   count, fmt->rf_name);
                }
        } else { /* SUB request */
+               struct ptlrpc_request *req = pill->rc_req;
+               __u32 used_len;
                __u32 msg_len;
 
                msg_len = lustre_msg_size_v2(count, pill->rc_area[RCL_SERVER]);
-               if (msg_len > pill->rc_reqmsg->lm_repsize) {
+               used_len = (char *)pill->rc_repmsg - (char *)req->rq_repmsg;
+               /* Overflow the reply buffer */
+               if (used_len + msg_len > req->rq_replen) {
+                       __u32 len;
+                       __u32 max;
+
+                       if (!req_capsule_has_field(&req->rq_pill,
+                                                  &RMF_BUT_REPLY, RCL_SERVER))
+                               return -EINVAL;
+
+                       if (!req_capsule_field_present(&req->rq_pill,
+                                                      &RMF_BUT_REPLY,
+                                                      RCL_SERVER))
+                               return -EINVAL;
+
+                       if (used_len + msg_len > BUT_MAXREPSIZE)
+                               return -EOVERFLOW;
+
+                       len = req_capsule_get_size(&req->rq_pill,
+                                                  &RMF_BUT_REPLY, RCL_SERVER);
+                       /*
+                        * Currently just increase the batch reply buffer
+                        * by 2.
+                        */
+                       max = BUT_MAXREPSIZE - req->rq_replen;
+                       if (used_len + msg_len > len)
+                               len = used_len + msg_len;
+
+                       if (len > max)
+                               len += max;
+                       else
+                               len += len;
+                       rc = req_capsule_server_grow(&req->rq_pill,
+                                                    &RMF_BUT_REPLY, len);
+                       if (rc)
+                               return rc;
+
+                       pill->rc_repmsg =
+                               (struct lustre_msg *)((char *)req->rq_repmsg +
+                                                     used_len);
+               }
+               if (msg_len > pill->rc_reqmsg->lm_repsize)
                        /* TODO: Check whether there is enough buffer size */
                        CDEBUG(D_INFO,
                               "Overflow pack %d fields in format '%s' for "
                               "the SUB request with message len %u:%u\n",
                               count, fmt->rf_name, msg_len,
                               pill->rc_reqmsg->lm_repsize);
-               }
 
                rc = 0;
                lustre_init_msg_v2(pill->rc_repmsg, count,
@@ -2684,7 +2726,7 @@ int req_capsule_server_grow(struct req_capsule *pill,
        struct ptlrpc_reply_state *rs = req->rq_reply_state, *nrs;
        char *from, *to, *sptr = NULL;
        __u32 slen = 0, snewlen = 0;
-       __u32 offset, len;
+       __u32 offset, len, max, diff;
        int rc;
 
        LASSERT(pill->rc_fmt != NULL);
@@ -2718,13 +2760,23 @@ int req_capsule_server_grow(struct req_capsule *pill,
                }
 
                /*
-                * Currently just increase the reply buffer by 2 * newlen.
+                * Currently first try to increase the reply buffer by
+                * 2 * newlen with reply buffer limit of BUT_MAXREPSIZE.
                 * TODO: Enlarge the reply buffer properly according to the
                 * left SUB requests in the batch PTLRPC request.
                 */
                snewlen = newlen;
+               diff = snewlen - slen;
+               max = BUT_MAXREPSIZE - req->rq_replen;
+               if (diff > max)
+                       return -EOVERFLOW;
+
+               if (diff * 2 + len < max)
+                       newlen = (len + diff) * 2;
+               else
+                       newlen = len + max;
+
                req_capsule_set_size(pill, field, RCL_SERVER, snewlen);
-               newlen = len + cfs_size_round(2 * snewlen);
                req_capsule_set_size(&req->rq_pill, &RMF_BUT_REPLY, RCL_SERVER,
                                     newlen);
                offset = __req_capsule_offset(&req->rq_pill, &RMF_BUT_REPLY,
index 11ba80a..75627fe 100755 (executable)
@@ -13771,6 +13771,30 @@ test_123d() {
 }
 run_test 123d "Statahead on striped directories works correctly"
 
+test_123e() {
+       local max
+       local batch_max
+       local dir=$DIR/$tdir
+
+       mkdir $dir || error "mkdir $dir failed"
+       $LFS setstripe -C 32 $dir || error "setstripe $dir failed"
+
+       touch $dir/$tfile.{0..1000} || error "touch 1000 files failed"
+
+       max=$($LCTL get_param -n llite.*.statahead_max | head -n 1)
+       batch_max=$($LCTL get_param -n llite.*.statahead_batch_max | head -n 1)
+       stack_trap "$LCTL set_param llite.*.statahead_max=$max" EXIT
+       stack_trap "$LCTL set_param llite.*.statahead_batch_max=$batch_max" EXIT
+
+       $LCTL set_param llite.*.statahead_max=2048
+       $LCTL set_param llite.*.statahead_batch_max=1024
+
+       ls -l $dir
+       $LCTL get_param mdc.*.batch_stats
+       $LCTL get_param llite.*.statahead_*
+}
+run_test 123e "statahead with large wide striping"
+
 test_124a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"
        $LCTL get_param -n mdc.*.connect_flags | grep -q lru_resize ||