From: Mikhail Pershin Date: Mon, 17 Jun 2019 08:00:31 +0000 (+0300) Subject: LU-12443 ptlrpc: fix reply buffers shrinking and growing X-Git-Tag: 2.12.90~175 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=cedbb25e984ceb85a66bc5a315fbfa05c5bcb423 LU-12443 ptlrpc: fix reply buffers shrinking and growing The req_capsule_shrink() doesn't update capsule itself with new buffer lenghts after the shrinking. Usually it is not needed because reply is packed already. But if reply buffers are re-allocated by req_capsule_server_grow() then non-updated lenghts from capsule are used causing bigger reply message. That may cause client buffer re-allocation with resend. Patch does the following: - update capsule lenght after the shrinking - introduce lustre_grow_msg() to grow msg field in-place - update req_capsule_server_grow() to use generic lustre_grow_msg() and make it able to grow reply without re-allocation if reply buffer is big enough already - update sanity test 271f to use bigger file size to exceed current maximum reply buffer size allocated on client. Signed-off-by: Mikhail Pershin Change-Id: I154c55d98f41406d0c932c7e8705e0ecf3dfa935 Reviewed-on: https://review.whamcloud.com/35243 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Sebastien Buisson Tested-by: Sebastien Buisson Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 6235fcb..2611c87 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -2344,6 +2344,7 @@ int lustre_pack_reply_flags(struct ptlrpc_request *, int count, __u32 *lens, char **bufs, int flags); int lustre_shrink_msg(struct lustre_msg *msg, int segment, unsigned int newlen, int move_data); +int lustre_grow_msg(struct lustre_msg *msg, int segment, unsigned int newlen); void lustre_free_reply_state(struct ptlrpc_reply_state *rs); int __lustre_unpack_msg(struct lustre_msg *m, int len); __u32 lustre_msg_hdr_size(__u32 magic, __u32 count); diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c index 9780452..1431632 100644 --- a/lustre/ptlrpc/layout.c +++ b/lustre/ptlrpc/layout.c @@ -2498,17 +2498,22 @@ void req_capsule_shrink(struct req_capsule *pill, offset = __req_capsule_offset(pill, field, loc); - msg = __req_msg(pill, loc); - len = lustre_msg_buflen(msg, offset); + msg = __req_msg(pill, loc); + len = lustre_msg_buflen(msg, offset); LASSERTF(newlen <= len, "%s:%s, oldlen=%u, newlen=%u\n", - fmt->rf_name, field->rmf_name, len, newlen); - - if (loc == RCL_CLIENT) - pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen, - 1); - else - pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen, - 1); + fmt->rf_name, field->rmf_name, len, newlen); + + if (loc == RCL_CLIENT) { + pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen, + 1); + } else { + pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen, + 1); + /* update also field size in reply lenghts arrays for possible + * reply re-pack due to req_capsule_server_grow() call. + */ + req_capsule_set_size(pill, field, loc, newlen); + } } EXPORT_SYMBOL(req_capsule_shrink); @@ -2516,46 +2521,52 @@ int req_capsule_server_grow(struct req_capsule *pill, const struct req_msg_field *field, __u32 newlen) { - struct ptlrpc_reply_state *rs = pill->rc_req->rq_reply_state, *nrs; - char *from, *to; + struct ptlrpc_reply_state *rs = pill->rc_req->rq_reply_state, *nrs; + char *from, *to; int rc; __u32 offset, len; - LASSERT(pill->rc_fmt != NULL); - LASSERT(__req_format_is_sane(pill->rc_fmt)); - LASSERT(req_capsule_has_field(pill, field, RCL_SERVER)); - LASSERT(req_capsule_field_present(pill, field, RCL_SERVER)); - - len = req_capsule_get_size(pill, field, RCL_SERVER); - offset = __req_capsule_offset(pill, field, RCL_SERVER); - if ((__u32)pill->rc_req->rq_repbuf_len >= - lustre_packed_msg_size(pill->rc_req->rq_repmsg) - len + newlen) - CERROR("Inplace repack might be done\n"); - - pill->rc_req->rq_reply_state = NULL; - req_capsule_set_size(pill, field, RCL_SERVER, newlen); - rc = req_capsule_server_pack(pill); - if (rc) { - /* put old rs back, the caller will decide what to do */ - pill->rc_req->rq_reply_state = rs; - return rc; - } - nrs = pill->rc_req->rq_reply_state; - /* Now we need only buffers, copy first chunk */ - to = lustre_msg_buf(nrs->rs_msg, 0, 0); - from = lustre_msg_buf(rs->rs_msg, 0, 0); - len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) - from; - memcpy(to, from, len); - /* check if we have tail and copy it too */ - if (rs->rs_msg->lm_bufcount > offset + 1) { - to = lustre_msg_buf(nrs->rs_msg, offset + 1, 0); - from = lustre_msg_buf(rs->rs_msg, offset + 1, 0); - offset = rs->rs_msg->lm_bufcount - 1; - len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) + - cfs_size_round(rs->rs_msg->lm_buflens[offset]) - from; - memcpy(to, from, len); - } - /* drop old reply if everything is fine */ + LASSERT(pill->rc_fmt != NULL); + LASSERT(__req_format_is_sane(pill->rc_fmt)); + LASSERT(req_capsule_has_field(pill, field, RCL_SERVER)); + LASSERT(req_capsule_field_present(pill, field, RCL_SERVER)); + + len = req_capsule_get_size(pill, field, RCL_SERVER); + offset = __req_capsule_offset(pill, field, RCL_SERVER); + + CDEBUG(D_INFO, "Reply packed: %d, allocated: %d, field len %d -> %d\n", + lustre_packed_msg_size(rs->rs_msg), rs->rs_repbuf_len, + len, newlen); + + req_capsule_set_size(pill, field, RCL_SERVER, newlen); + /* there can be enough space in current reply buffer */ + if (rs->rs_repbuf_len >= + lustre_packed_msg_size(rs->rs_msg) - len + newlen) { + pill->rc_req->rq_replen = lustre_grow_msg(rs->rs_msg, offset, + newlen); + return 0; + } + + /* Re-allocate replay state */ + pill->rc_req->rq_reply_state = NULL; + rc = req_capsule_server_pack(pill); + if (rc) { + /* put old values back, the caller should decide what to do */ + req_capsule_set_size(pill, field, RCL_SERVER, len); + pill->rc_req->rq_reply_state = rs; + return rc; + } + nrs = pill->rc_req->rq_reply_state; + LASSERT(lustre_packed_msg_size(nrs->rs_msg) > + lustre_packed_msg_size(rs->rs_msg)); + + /* Now we need only buffers, copy them and grow the needed one */ + to = lustre_msg_buf(nrs->rs_msg, 0, 0); + from = lustre_msg_buf(rs->rs_msg, 0, 0); + len = (char *)rs->rs_msg + lustre_packed_msg_size(rs->rs_msg) - from; + memcpy(to, from, len); + pill->rc_req->rq_replen = lustre_grow_msg(nrs->rs_msg, offset, newlen); + if (rs->rs_difficult) { /* copy rs data */ int i; diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 3c4c3a0..8dd3c1f 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -502,6 +502,57 @@ int lustre_shrink_msg(struct lustre_msg *msg, int segment, } EXPORT_SYMBOL(lustre_shrink_msg); +static int lustre_grow_msg_v2(struct lustre_msg_v2 *msg, __u32 segment, + unsigned int newlen) +{ + char *tail = NULL, *newpos; + int tail_len = 0, n; + + LASSERT(msg); + LASSERT(msg->lm_bufcount > segment); + LASSERT(msg->lm_buflens[segment] <= newlen); + + if (msg->lm_buflens[segment] == newlen) + goto out; + + if (msg->lm_bufcount > segment + 1) { + tail = lustre_msg_buf_v2(msg, segment + 1, 0); + for (n = segment + 1; n < msg->lm_bufcount; n++) + tail_len += cfs_size_round(msg->lm_buflens[n]); + } + + msg->lm_buflens[segment] = newlen; + + if (tail && tail_len) { + newpos = lustre_msg_buf_v2(msg, segment + 1, 0); + memmove(newpos, tail, tail_len); + } +out: + return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); +} + +/* + * for @msg, grow @segment to size @newlen. + * Always move higher buffer forward. + * + * return new msg size after growing. + * + * CAUTION: + * - caller must make sure there is enough space in allocated message buffer + * - caller should NOT keep pointers to msg buffers which higher than @segment + * after call shrink. + */ +int lustre_grow_msg(struct lustre_msg *msg, int segment, unsigned int newlen) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_grow_msg_v2(msg, segment, newlen); + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_grow_msg); + void lustre_free_reply_state(struct ptlrpc_reply_state *rs) { PTLRPC_RS_DEBUG_LRU_DEL(rs); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index af2ddb3..2b02c2f 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -18277,8 +18277,8 @@ test_271f() { local mdtidx=$($LFS getstripe --mdt-index $DIR/$tdir) cancel_lru_locks mdc - dd if=/dev/urandom of=$tmp bs=200000 count=1 - dd if=$tmp of=$dom bs=200000 count=1 + dd if=/dev/urandom of=$tmp bs=265000 count=1 + dd if=$tmp of=$dom bs=265000 count=1 cancel_lru_locks mdc cat /etc/hosts >> $tmp lctl set_param -n mdc.*.stats=clear @@ -18305,6 +18305,7 @@ test_271f() { local ra=$(get_mdc_stats $mdtidx req_active) local rw=$(get_mdc_stats $mdtidx req_waittime) + [ -z $num ] && num=0 [ $num -eq 1 ] || error "expect 1 READ RPC, $num occured" [ $ra == $rw ] || error "$((ra - rw)) resend occured" echo "... DONE"