From 608b4483d35d6f3fac2ac25a0bc24467022aea4d Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Fri, 19 Mar 2010 01:26:09 -0700 Subject: [PATCH] b=21486 fix for truncated reply buffer i=eeb i=ericm reply buffer could be referred by reply_in_callback after released --- lustre/include/lustre_net.h | 5 +++-- lustre/ptlrpc/client.c | 31 +++++++++++++++++++++++++------ lustre/ptlrpc/events.c | 2 +- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index c50eaad..e4f845c 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -339,8 +339,9 @@ struct ptlrpc_request { int rq_request_portal; /* XXX FIXME bug 249 */ int rq_reply_portal; /* XXX FIXME bug 249 */ - int rq_nob_received; /* client-side # reply bytes actually received */ - + int rq_nob_received; /* client-side: + * !rq_truncate : # reply bytes actually received, + * rq_truncate : required repbuf_len for resend */ int rq_reqlen; struct lustre_msg *rq_reqmsg; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 73f174a..7b02fb4 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1010,21 +1010,33 @@ static int after_reply(struct ptlrpc_request *req) long timediff; ENTRY; - LASSERT(!req->rq_receiving_reply); + /* repbuf must be unlinked */ + LASSERT(!req->rq_receiving_reply && !req->rq_must_unlink); LASSERT(obd); /* NB Until this point, the whole of the incoming message, * including buflens, status etc is in the sender's byte order. */ - if (req->rq_reply_truncate && !req->rq_no_resend) { - req->rq_resend = 1; + if (req->rq_reply_truncate) { + if (req->rq_no_resend) { + DEBUG_REQ(D_ERROR, req, "reply buffer overflow," + " expected: %d, actual size: %d", + req->rq_nob_received, req->rq_replen); + RETURN(-EOVERFLOW); + } + OBD_FREE(req->rq_repbuf, req->rq_replen); req->rq_repbuf = NULL; - req->rq_replen = req->rq_nob_received; + /* Pass the required reply buffer size (include + * space for early reply) */ + req->rq_replen = size_round(req->rq_nob_received); + req->rq_nob_received = 0; + req->rq_resend = 1; RETURN(0); } - LASSERT (req->rq_nob_received <= req->rq_replen); + LASSERT ((char *)req->rq_repmsg + req->rq_nob_received <= + (char *)req->rq_repbuf + req->rq_replen); rc = unpack_reply(req); if (rc) RETURN(rc); @@ -1181,6 +1193,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) struct ptlrpc_request *req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); struct obd_import *imp = req->rq_import; + int unregistered = 0; int rc = 0; if (req->rq_phase == RQ_PHASE_NEW && @@ -1353,6 +1366,12 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) spin_unlock(&req->rq_lock); + /* unlink from net because we are going to + * swab in-place of reply buffer */ + unregistered = ptlrpc_unregister_reply(req, 1); + if (!unregistered) + continue; + req->rq_status = after_reply(req); if (req->rq_resend) continue; @@ -1386,7 +1405,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) /* This moves to "unregistering" phase we need to wait for * reply unlink. */ - if (!ptlrpc_unregister_reply(req, 1)) + if (!unregistered && !ptlrpc_unregister_reply(req, 1)) continue; if (!ptlrpc_unregister_bulk(req, 1)) diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index e664265..e624950 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -96,7 +96,7 @@ void reply_in_callback(lnet_event_t *ev) LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK); LASSERT(ev->md.start == req->rq_repbuf); - LASSERT(ev->mlength <= req->rq_replen); + LASSERT(ev->offset + ev->mlength <= req->rq_replen); /* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests for adaptive timeouts' early reply. */ LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0); -- 1.8.3.1