LU-14139 ptlrpc: grow PtlRPC properly when prepare sub request

author Qian Yingjin <qian@ddn.com>

Fri, 14 May 2021 14:53:52 +0000 (22:53 +0800)

committer Oleg Drokin <green@whamcloud.com>

Sat, 22 Apr 2023 17:32:42 +0000 (17:32 +0000)
author Qian Yingjin <qian@ddn.com>
Fri, 14 May 2021 14:53:52 +0000 (22:53 +0800)
committer Oleg Drokin <green@whamcloud.com>
Sat, 22 Apr 2023 17:32:42 +0000 (17:32 +0000)
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h

index 42ee5bf..6b700eb 100644 (file)
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -373,6 +373,9 @@
  #define OUT_MAXREQSIZE (1000 * 1024)
  #define OUT_MAXREPSIZE MDS_MAXREPSIZE
  
+#define BUT_MAXREQSIZE OUT_MAXREQSIZE
+#define BUT_MAXREPSIZE BUT_MAXREQSIZE
+
  /** MDS_BUFSIZE = max_reqsize (w/o LOV EA) + max sptlrpc payload size */
  #define MDS_BUFSIZE            max(MDS_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \
                                     8 * 1024)
diff --git a/lustre/mdc/mdc_batch.c b/lustre/mdc/mdc_batch.c

index d997e42..c2ebd17 100644 (file)
--- a/lustre/mdc/mdc_batch.c
+++ b/lustre/mdc/mdc_batch.c
@@ -138,7 +138,7 @@ static int mdc_batch_getattr_pack(struct batch_update_head *head,
         req_capsule_set_size(&pill, &RMF_ACL, RCL_SERVER,
                              LUSTRE_POSIX_ACL_MAX_SIZE_OLD);
         req_capsule_set_size(&pill, &RMF_DEFAULT_MDT_MD, RCL_SERVER,
-                            sizeof(struct lmv_user_md));
+                            /*sizeof(struct lmv_user_md)*/MIN_MD_SIZE);
  
         if (have_secctx) {
                 char *secctx_name;
diff --git a/lustre/mdt/mdt_batch.c b/lustre/mdt/mdt_batch.c

index cfd6aa1..2dff768 100644 (file)
--- a/lustre/mdt/mdt_batch.c
+++ b/lustre/mdt/mdt_batch.c
@@ -154,6 +154,7 @@ int mdt_batch(struct tgt_session_info *tsi)
         __u32 update_buf_count;
         __u32 packed_replen;
         void **update_bufs;
+       bool grown = false;
         int buh_size;
         int rc;
         int i;
@@ -326,7 +327,11 @@ next:
                          * As @repmsg may be changed if the reply buffer is
                          * too small to grow, thus it needs to reload it here.
                          */
-                       repmsg = pill->rc_repmsg;
+                       if (repmsg != pill->rc_repmsg) {
+                               repmsg = pill->rc_repmsg;
+                               grown = true;
+                       }
+
                         repmsg->lm_result = rc;
                         mdt_thread_info_reset(info);
  
@@ -342,9 +347,17 @@ next:
                 req_capsule_shrink(&req->rq_pill, &RMF_BUT_REPLY,
                                    packed_replen, RCL_SERVER);
  out:
-       if (reply != NULL)
+       if (reply != NULL) {
+               if (grown) {
+                       reply = req_capsule_server_get(&req->rq_pill,
+                                                      &RMF_BUT_REPLY);
+                       if (reply == NULL)
+                               GOTO(out_free, rc = -EPROTO);
+               }
                 reply->burp_count = handled_update_count;
+       }
  
+out_free:
         if (update_bufs != NULL) {
                 if (bub != NULL) {
                         for (i = 0; i < update_buf_count; i++, bub++) {
diff --git a/lustre/ptlrpc/batch.c b/lustre/ptlrpc/batch.c

index d678211..6381cc0 100644 (file)
--- a/lustre/ptlrpc/batch.c
+++ b/lustre/ptlrpc/batch.c
@@ -365,11 +365,16 @@ static int batch_update_request_fini(struct batch_update_head *head,
                          */
                         repmsg = NULL;
                         rc1 = -ECANCELED;
+                       /*
+                        * TODO: resend the unfinished sub request when the
+                        * return code is -EOVERFLOW.
+                        */
                 }
  
                 if (ouc->ouc_interpret != NULL)
                         ouc->ouc_interpret(req, repmsg, ouc, rc1);
  
+               index++;
                 object_update_callback_fini(ouc);
                 if (rc == 0 && rc1 < 0)
                         rc = rc1;
diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c

index d8057b8..4fb7b28 100644 (file)
--- a/lustre/ptlrpc/layout.c
+++ b/lustre/ptlrpc/layout.c
@@ -2023,17 +2023,59 @@ int req_capsule_server_pack(struct req_capsule *pill)
                                    count, fmt->rf_name);
                 }
         } else { /* SUB request */
+               struct ptlrpc_request *req = pill->rc_req;
+               __u32 used_len;
                 __u32 msg_len;
  
                 msg_len = lustre_msg_size_v2(count, pill->rc_area[RCL_SERVER]);
-               if (msg_len > pill->rc_reqmsg->lm_repsize) {
+               used_len = (char *)pill->rc_repmsg - (char *)req->rq_repmsg;
+               /* Overflow the reply buffer */
+               if (used_len + msg_len > req->rq_replen) {
+                       __u32 len;
+                       __u32 max;
+
+                       if (!req_capsule_has_field(&req->rq_pill,
+                                                  &RMF_BUT_REPLY, RCL_SERVER))
+                               return -EINVAL;
+
+                       if (!req_capsule_field_present(&req->rq_pill,
+                                                      &RMF_BUT_REPLY,
+                                                      RCL_SERVER))
+                               return -EINVAL;
+
+                       if (used_len + msg_len > BUT_MAXREPSIZE)
+                               return -EOVERFLOW;
+
+                       len = req_capsule_get_size(&req->rq_pill,
+                                                  &RMF_BUT_REPLY, RCL_SERVER);
+                       /*
+                        * Currently just increase the batch reply buffer
+                        * by 2.
+                        */
+                       max = BUT_MAXREPSIZE - req->rq_replen;
+                       if (used_len + msg_len > len)
+                               len = used_len + msg_len;
+
+                       if (len > max)
+                               len += max;
+                       else
+                               len += len;
+                       rc = req_capsule_server_grow(&req->rq_pill,
+                                                    &RMF_BUT_REPLY, len);
+                       if (rc)
+                               return rc;
+
+                       pill->rc_repmsg =
+                               (struct lustre_msg *)((char *)req->rq_repmsg +
+                                                     used_len);
+               }
+               if (msg_len > pill->rc_reqmsg->lm_repsize)
                         /* TODO: Check whether there is enough buffer size */
                         CDEBUG(D_INFO,
                                "Overflow pack %d fields in format '%s' for "
                                "the SUB request with message len %u:%u\n",
                                count, fmt->rf_name, msg_len,
                                pill->rc_reqmsg->lm_repsize);
-               }
  
                 rc = 0;
                 lustre_init_msg_v2(pill->rc_repmsg, count,
@@ -2684,7 +2726,7 @@ int req_capsule_server_grow(struct req_capsule *pill,
         struct ptlrpc_reply_state *rs = req->rq_reply_state, *nrs;
         char *from, *to, *sptr = NULL;
         __u32 slen = 0, snewlen = 0;
-       __u32 offset, len;
+       __u32 offset, len, max, diff;
         int rc;
  
         LASSERT(pill->rc_fmt != NULL);
@@ -2718,13 +2760,23 @@ int req_capsule_server_grow(struct req_capsule *pill,
                 }
  
                 /*
-                * Currently just increase the reply buffer by 2 * newlen.
+                * Currently first try to increase the reply buffer by
+                * 2 * newlen with reply buffer limit of BUT_MAXREPSIZE.
                  * TODO: Enlarge the reply buffer properly according to the
                  * left SUB requests in the batch PTLRPC request.
                  */
                 snewlen = newlen;
+               diff = snewlen - slen;
+               max = BUT_MAXREPSIZE - req->rq_replen;
+               if (diff > max)
+                       return -EOVERFLOW;
+
+               if (diff * 2 + len < max)
+                       newlen = (len + diff) * 2;
+               else
+                       newlen = len + max;
+
                 req_capsule_set_size(pill, field, RCL_SERVER, snewlen);
-               newlen = len + cfs_size_round(2 * snewlen);
                 req_capsule_set_size(&req->rq_pill, &RMF_BUT_REPLY, RCL_SERVER,
                                      newlen);
                 offset = __req_capsule_offset(&req->rq_pill, &RMF_BUT_REPLY,
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index 11ba80a..75627fe 100755 (executable)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -13771,6 +13771,30 @@ test_123d() {
  }
  run_test 123d "Statahead on striped directories works correctly"
  
+test_123e() {
+       local max
+       local batch_max
+       local dir=$DIR/$tdir
+
+       mkdir $dir || error "mkdir $dir failed"
+       $LFS setstripe -C 32 $dir || error "setstripe $dir failed"
+
+       touch $dir/$tfile.{0..1000} || error "touch 1000 files failed"
+
+       max=$($LCTL get_param -n llite.*.statahead_max | head -n 1)
+       batch_max=$($LCTL get_param -n llite.*.statahead_batch_max | head -n 1)
+       stack_trap "$LCTL set_param llite.*.statahead_max=$max" EXIT
+       stack_trap "$LCTL set_param llite.*.statahead_batch_max=$batch_max" EXIT
+
+       $LCTL set_param llite.*.statahead_max=2048
+       $LCTL set_param llite.*.statahead_batch_max=1024
+
+       ls -l $dir
+       $LCTL get_param mdc.*.batch_stats
+       $LCTL get_param llite.*.statahead_*
+}
+run_test 123e "statahead with large wide striping"
+
  test_124a() {
         [ $PARALLEL == "yes" ] && skip "skip parallel run"
         $LCTL get_param -n mdc.*.connect_flags | grep -q lru_resize ||
author	Qian Yingjin <qian@ddn.com>
	Fri, 14 May 2021 14:53:52 +0000 (22:53 +0800)
committer	Oleg Drokin <green@whamcloud.com>
	Sat, 22 Apr 2023 17:32:42 +0000 (17:32 +0000)
lustre/include/lustre_net.h		patch \| blob \| history
lustre/mdc/mdc_batch.c		patch \| blob \| history
lustre/mdt/mdt_batch.c		patch \| blob \| history
lustre/ptlrpc/batch.c		patch \| blob \| history
lustre/ptlrpc/layout.c		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history