From be025f5580a0cc4958267d2e4317aac4e2ebc0c3 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Tue, 1 Jan 2013 16:23:09 +0800 Subject: [PATCH] LU-2432 ptlrpc: alloc_rqbd spin on vmap_area_lock vmalloc based allocations can potentially take a very long time to complete due to a regression in the kernel. As a result, MDS service threads might lock up for certain periods of time while all of the cores spin on the vmap_area_lock down in ptlrpc_alloc_rqbd. Tihs patch only allow one thread of each CPT to enter rqbd alloation path. Signed-off-by: Liang Zhen Change-Id: I4ba442801859ae58cf8e8dd8ae18af1062379639 Reviewed-on: http://review.whamcloud.com/4939 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Andreas Dilger Reviewed-by: Prakash Surya Reviewed-by: Oleg Drokin --- lustre/include/lustre_net.h | 2 ++ lustre/ptlrpc/service.c | 28 ++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index c02ecc2..fb49288 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -1426,6 +1426,8 @@ struct ptlrpc_service_part { int scp_nrqbds_total; /** # posted request buffers for receiving */ int scp_nrqbds_posted; + /** in progress of allocating rqbd */ + int scp_rqbd_allocating; /** # incoming reqs */ int scp_nreqs_incoming; /** request buffers to be reposted */ diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index cd94b25..ab19b5c 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -125,9 +125,25 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post) int rc = 0; int i; + if (svcpt->scp_rqbd_allocating) + goto try_post; + + spin_lock(&svcpt->scp_lock); + /* check again with lock */ + if (svcpt->scp_rqbd_allocating) { + /* NB: we might allow more than one thread in the future */ + LASSERT(svcpt->scp_rqbd_allocating == 1); + spin_unlock(&svcpt->scp_lock); + goto try_post; + } + + svcpt->scp_rqbd_allocating++; + spin_unlock(&svcpt->scp_lock); + + for (i = 0; i < svc->srv_nbuf_per_group; i++) { - /* NB: another thread might be doing this as well, we need to - * make sure that it wouldn't over-allocate, see LU-1212. */ + /* NB: another thread might have recycled enough rqbds, we + * need to make sure it wouldn't over-allocate, see LU-1212. */ if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group) break; @@ -141,11 +157,19 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post) } } + spin_lock(&svcpt->scp_lock); + + LASSERT(svcpt->scp_rqbd_allocating == 1); + svcpt->scp_rqbd_allocating--; + + spin_unlock(&svcpt->scp_lock); + CDEBUG(D_RPCTRACE, "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n", svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted, svcpt->scp_nrqbds_total, rc); + try_post: if (post && rc == 0) rc = ptlrpc_server_post_idle_rqbds(svcpt); -- 1.8.3.1