Whamcloud - gitweb
LU-12443 ptlrpc: fix reply buffers shrinking and growing 43/35243/7
authorMikhail Pershin <mpershin@whamcloud.com>
Mon, 17 Jun 2019 08:00:31 +0000 (11:00 +0300)
committerOleg Drokin <green@whamcloud.com>
Mon, 16 Sep 2019 23:01:05 +0000 (23:01 +0000)
The req_capsule_shrink() doesn't update capsule itself with
new buffer lenghts after the shrinking. Usually it is not
needed because reply is packed already. But if reply buffers
are re-allocated by req_capsule_server_grow() then non-updated
lenghts from capsule are used causing bigger reply message.
That may cause client buffer re-allocation with resend.

Patch does the following:
- update capsule lenght after the shrinking
- introduce lustre_grow_msg() to grow msg field in-place
- update req_capsule_server_grow() to use generic
  lustre_grow_msg() and make it able to grow reply without
  re-allocation if reply buffer is big enough already
- update sanity test 271f to use bigger file size to exceed
  current maximum reply buffer size allocated on client.

Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: I154c55d98f41406d0c932c7e8705e0ecf3dfa935
Reviewed-on: https://review.whamcloud.com/35243
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Tested-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_net.h
lustre/ptlrpc/layout.c
lustre/ptlrpc/pack_generic.c
lustre/tests/sanity.sh

index 6235fcb..2611c87 100644 (file)
@@ -2344,6 +2344,7 @@ int lustre_pack_reply_flags(struct ptlrpc_request *, int count, __u32 *lens,
                             char **bufs, int flags);
 int lustre_shrink_msg(struct lustre_msg *msg, int segment,
                       unsigned int newlen, int move_data);
+int lustre_grow_msg(struct lustre_msg *msg, int segment, unsigned int newlen);
 void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
 int __lustre_unpack_msg(struct lustre_msg *m, int len);
 __u32 lustre_msg_hdr_size(__u32 magic, __u32 count);
index 9780452..1431632 100644 (file)
@@ -2498,17 +2498,22 @@ void req_capsule_shrink(struct req_capsule *pill,
 
         offset = __req_capsule_offset(pill, field, loc);
 
-        msg = __req_msg(pill, loc);
-        len = lustre_msg_buflen(msg, offset);
+       msg = __req_msg(pill, loc);
+       len = lustre_msg_buflen(msg, offset);
        LASSERTF(newlen <= len, "%s:%s, oldlen=%u, newlen=%u\n",
-                                fmt->rf_name, field->rmf_name, len, newlen);
-
-        if (loc == RCL_CLIENT)
-                pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen,
-                                                            1);
-        else
-                pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen,
-                                                            1);
+                fmt->rf_name, field->rmf_name, len, newlen);
+
+       if (loc == RCL_CLIENT) {
+               pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen,
+                                                           1);
+       } else {
+               pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen,
+                                                           1);
+               /* update also field size in reply lenghts arrays for possible
+                * reply re-pack due to req_capsule_server_grow() call.
+                */
+               req_capsule_set_size(pill, field, loc, newlen);
+       }
 }
 EXPORT_SYMBOL(req_capsule_shrink);
 
@@ -2516,46 +2521,52 @@ int req_capsule_server_grow(struct req_capsule *pill,
                            const struct req_msg_field *field,
                            __u32 newlen)
 {
-        struct ptlrpc_reply_state *rs = pill->rc_req->rq_reply_state, *nrs;
-        char *from, *to;
+       struct ptlrpc_reply_state *rs = pill->rc_req->rq_reply_state, *nrs;
+       char *from, *to;
        int rc;
        __u32 offset, len;
 
-        LASSERT(pill->rc_fmt != NULL);
-        LASSERT(__req_format_is_sane(pill->rc_fmt));
-        LASSERT(req_capsule_has_field(pill, field, RCL_SERVER));
-        LASSERT(req_capsule_field_present(pill, field, RCL_SERVER));
-
-        len = req_capsule_get_size(pill, field, RCL_SERVER);
-        offset = __req_capsule_offset(pill, field, RCL_SERVER);
-       if ((__u32)pill->rc_req->rq_repbuf_len >=
-            lustre_packed_msg_size(pill->rc_req->rq_repmsg) - len + newlen)
-                CERROR("Inplace repack might be done\n");
-
-        pill->rc_req->rq_reply_state = NULL;
-        req_capsule_set_size(pill, field, RCL_SERVER, newlen);
-        rc = req_capsule_server_pack(pill);
-        if (rc) {
-                /* put old rs back, the caller will decide what to do */
-                pill->rc_req->rq_reply_state = rs;
-                return rc;
-        }
-        nrs = pill->rc_req->rq_reply_state;
-        /* Now we need only buffers, copy first chunk */
-        to = lustre_msg_buf(nrs->rs_msg, 0, 0);
-        from = lustre_msg_buf(rs->rs_msg, 0, 0);
-        len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) - from;
-        memcpy(to, from, len);
-        /* check if we have tail and copy it too */
-        if (rs->rs_msg->lm_bufcount > offset + 1) {
-                to = lustre_msg_buf(nrs->rs_msg, offset + 1, 0);
-                from = lustre_msg_buf(rs->rs_msg, offset + 1, 0);
-                offset = rs->rs_msg->lm_bufcount - 1;
-                len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) +
-                      cfs_size_round(rs->rs_msg->lm_buflens[offset]) - from;
-                memcpy(to, from, len);
-        }
-        /* drop old reply if everything is fine */
+       LASSERT(pill->rc_fmt != NULL);
+       LASSERT(__req_format_is_sane(pill->rc_fmt));
+       LASSERT(req_capsule_has_field(pill, field, RCL_SERVER));
+       LASSERT(req_capsule_field_present(pill, field, RCL_SERVER));
+
+       len = req_capsule_get_size(pill, field, RCL_SERVER);
+       offset = __req_capsule_offset(pill, field, RCL_SERVER);
+
+       CDEBUG(D_INFO, "Reply packed: %d, allocated: %d, field len %d -> %d\n",
+              lustre_packed_msg_size(rs->rs_msg), rs->rs_repbuf_len,
+                                     len, newlen);
+
+       req_capsule_set_size(pill, field, RCL_SERVER, newlen);
+       /* there can be enough space in current reply buffer */
+       if (rs->rs_repbuf_len >=
+           lustre_packed_msg_size(rs->rs_msg) - len + newlen) {
+               pill->rc_req->rq_replen = lustre_grow_msg(rs->rs_msg, offset,
+                                                         newlen);
+               return 0;
+       }
+
+       /* Re-allocate replay state */
+       pill->rc_req->rq_reply_state = NULL;
+       rc = req_capsule_server_pack(pill);
+       if (rc) {
+               /* put old values back, the caller should decide what to do */
+               req_capsule_set_size(pill, field, RCL_SERVER, len);
+               pill->rc_req->rq_reply_state = rs;
+               return rc;
+       }
+       nrs = pill->rc_req->rq_reply_state;
+       LASSERT(lustre_packed_msg_size(nrs->rs_msg) >
+               lustre_packed_msg_size(rs->rs_msg));
+
+       /* Now we need only buffers, copy them and grow the needed one */
+       to = lustre_msg_buf(nrs->rs_msg, 0, 0);
+       from = lustre_msg_buf(rs->rs_msg, 0, 0);
+       len = (char *)rs->rs_msg + lustre_packed_msg_size(rs->rs_msg) - from;
+       memcpy(to, from, len);
+       pill->rc_req->rq_replen = lustre_grow_msg(nrs->rs_msg, offset, newlen);
+
         if (rs->rs_difficult) {
                 /* copy rs data */
                 int i;
index 3c4c3a0..8dd3c1f 100644 (file)
@@ -502,6 +502,57 @@ int lustre_shrink_msg(struct lustre_msg *msg, int segment,
 }
 EXPORT_SYMBOL(lustre_shrink_msg);
 
+static int lustre_grow_msg_v2(struct lustre_msg_v2 *msg, __u32 segment,
+                             unsigned int newlen)
+{
+       char *tail = NULL, *newpos;
+       int tail_len = 0, n;
+
+       LASSERT(msg);
+       LASSERT(msg->lm_bufcount > segment);
+       LASSERT(msg->lm_buflens[segment] <= newlen);
+
+       if (msg->lm_buflens[segment] == newlen)
+               goto out;
+
+       if (msg->lm_bufcount > segment + 1) {
+               tail = lustre_msg_buf_v2(msg, segment + 1, 0);
+               for (n = segment + 1; n < msg->lm_bufcount; n++)
+                       tail_len += cfs_size_round(msg->lm_buflens[n]);
+       }
+
+       msg->lm_buflens[segment] = newlen;
+
+       if (tail && tail_len) {
+               newpos = lustre_msg_buf_v2(msg, segment + 1, 0);
+               memmove(newpos, tail, tail_len);
+       }
+out:
+       return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+}
+
+/*
+ * for @msg, grow @segment to size @newlen.
+ * Always move higher buffer forward.
+ *
+ * return new msg size after growing.
+ *
+ * CAUTION:
+ * - caller must make sure there is enough space in allocated message buffer
+ * - caller should NOT keep pointers to msg buffers which higher than @segment
+ *   after call shrink.
+ */
+int lustre_grow_msg(struct lustre_msg *msg, int segment, unsigned int newlen)
+{
+       switch (msg->lm_magic) {
+       case LUSTRE_MSG_MAGIC_V2:
+               return lustre_grow_msg_v2(msg, segment, newlen);
+       default:
+               LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+       }
+}
+EXPORT_SYMBOL(lustre_grow_msg);
+
 void lustre_free_reply_state(struct ptlrpc_reply_state *rs)
 {
        PTLRPC_RS_DEBUG_LRU_DEL(rs);
index af2ddb3..2b02c2f 100644 (file)
@@ -18277,8 +18277,8 @@ test_271f() {
        local mdtidx=$($LFS getstripe --mdt-index $DIR/$tdir)
 
        cancel_lru_locks mdc
-       dd if=/dev/urandom of=$tmp bs=200000 count=1
-       dd if=$tmp of=$dom bs=200000 count=1
+       dd if=/dev/urandom of=$tmp bs=265000 count=1
+       dd if=$tmp of=$dom bs=265000 count=1
        cancel_lru_locks mdc
        cat /etc/hosts >> $tmp
        lctl set_param -n mdc.*.stats=clear
@@ -18305,6 +18305,7 @@ test_271f() {
        local ra=$(get_mdc_stats $mdtidx req_active)
        local rw=$(get_mdc_stats $mdtidx req_waittime)
 
+       [ -z $num ] && num=0
        [ $num -eq 1 ] || error "expect 1 READ RPC, $num occured"
        [ $ra == $rw ] || error "$((ra - rw)) resend occured"
        echo "... DONE"