From 6182af3703026ac633b6f0bddc3e90958dc9631d Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Fri, 23 Jan 2015 15:27:22 -0800 Subject: [PATCH] LU-6122 lnet: Allocate the correct number of rtr buffers This patch ensures that the correct number of router buffers are allocated. It keeps a count that keeps track of the number of buffers allocated. Another count keeps the number of buffers requested. The number of buffers allocated is set when creating new buffers and reduced when buffers are freed. The number of requested buffer is set when the buffers are allocated and is checked when credits are returned to determine whether the buffer should be freed or kept. In lnet_rtrpool_adjust_bufs() grab lnet_net_lock() before using rbp_nbuffers to ensure that it doesn't change by lnet_return_rx_credits_locked() during the process of allocating new buffers. All other access to rbp_nbuffers is already being protected by lnet_net_lock(). This avoids the case where we allocate less than the desired number of buffers. Signed-off-by: Amir Shehata Change-Id: I96627cc8ba3d3d70a0bf581b21ccd3c9b2de327f Reviewed-on: http://review.whamcloud.com/13519 Tested-by: Jenkins Reviewed-by: Jinshan Xiong Tested-by: Maloo Reviewed-by: Doug Oucharek Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-types.h | 4 +++- lnet/lnet/lib-move.c | 6 ++++-- lnet/lnet/router.c | 38 ++++++++++++++++++++++++++------------ 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 4701b38..5a63cd5 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -431,7 +431,9 @@ typedef struct { struct list_head rbp_msgs; /* # pages in each buffer */ int rbp_npages; - /* # buffers */ + /* requested number of buffers */ + int rbp_req_nbuffers; + /* # buffers actually allocated */ int rbp_nbuffers; /* # free buffers / blocked messages */ int rbp_credits; diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index e53370f..6d3a31e 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -1095,9 +1095,11 @@ lnet_return_rx_credits_locked(lnet_msg_t *msg) /* It is possible that a user has lowered the desired number of * buffers in this pool. Make sure we never put back * more buffers than the stated number. */ - if (rbp->rbp_credits >= rbp->rbp_nbuffers) { - /* Discard this buffer so we don't have too many. */ + if (unlikely(rbp->rbp_credits >= rbp->rbp_req_nbuffers)) { + /* Discard this buffer so we don't have too + * many. */ lnet_destroy_rtrbuf(rb, rbp->rbp_npages); + rbp->rbp_nbuffers--; } else { list_add(&rb->rb_list, &rbp->rbp_bufs); rbp->rbp_credits++; diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 6699245..e7eb4c9 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -1361,6 +1361,7 @@ lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp, int cpt) lnet_net_lock(cpt); lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt); list_splice_init(&rbp->rbp_bufs, &tmp); + rbp->rbp_req_nbuffers = 0; rbp->rbp_nbuffers = rbp->rbp_credits = 0; rbp->rbp_mincredits = 0; lnet_net_unlock(cpt); @@ -1380,37 +1381,50 @@ lnet_rtrpool_adjust_bufs(lnet_rtrbufpool_t *rbp, int nbufs, int cpt) lnet_rtrbuf_t *rb; int num_rb; int num_buffers = 0; + int old_req_nbufs; int npages = rbp->rbp_npages; + lnet_net_lock(cpt); /* If we are called for less buffers than already in the pool, we - * just lower the nbuffers number and excess buffers will be + * just lower the req_nbuffers number and excess buffers will be * thrown away as they are returned to the free list. Credits - * then get adjusted as well. */ - if (nbufs <= rbp->rbp_nbuffers) { - lnet_net_lock(cpt); - rbp->rbp_nbuffers = nbufs; + * then get adjusted as well. + * If we already have enough buffers allocated to serve the + * increase requested, then we can treat that the same way as we + * do the decrease. */ + num_rb = nbufs - rbp->rbp_nbuffers; + if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) { + rbp->rbp_req_nbuffers = nbufs; lnet_net_unlock(cpt); return 0; } + /* store the older value of rbp_req_nbuffers and then set it to + * the new request to prevent lnet_return_rx_credits_locked() from + * freeing buffers that we need to keep around */ + old_req_nbufs = rbp->rbp_req_nbuffers; + rbp->rbp_req_nbuffers = nbufs; + lnet_net_unlock(cpt); INIT_LIST_HEAD(&rb_list); /* allocate the buffers on a local list first. If all buffers are * allocated successfully then join this list to the rbp buffer * list. If not then free all allocated buffers. */ - num_rb = rbp->rbp_nbuffers; - - while (num_rb < nbufs) { + while (num_rb-- > 0) { rb = lnet_new_rtrbuf(rbp, cpt); if (rb == NULL) { CERROR("Failed to allocate %d route bufs of %d pages\n", nbufs, npages); + + lnet_net_lock(cpt); + rbp->rbp_req_nbuffers = old_req_nbufs; + lnet_net_unlock(cpt); + goto failed; } list_add(&rb->rb_list, &rb_list); num_buffers++; - num_rb++; } lnet_net_lock(cpt); @@ -1445,9 +1459,9 @@ lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages) INIT_LIST_HEAD(&rbp->rbp_msgs); INIT_LIST_HEAD(&rbp->rbp_bufs); - rbp->rbp_npages = npages; - rbp->rbp_credits = 0; - rbp->rbp_mincredits = 0; + rbp->rbp_npages = npages; + rbp->rbp_credits = 0; + rbp->rbp_mincredits = 0; } void -- 1.8.3.1