From be025f5580a0cc4958267d2e4317aac4e2ebc0c3 Mon Sep 17 00:00:00 2001
From: Liang Zhen <liang@whamcloud.com>
Date: Tue, 1 Jan 2013 16:23:09 +0800
Subject: [PATCH] LU-2432 ptlrpc: alloc_rqbd spin on vmap_area_lock

vmalloc based allocations can potentially take a very long time to
complete due to a regression in the kernel. As a result, MDS service
threads might lock up for certain periods of time while all of the
cores spin on the vmap_area_lock down in ptlrpc_alloc_rqbd.

Tihs patch only allow one thread of each CPT to enter rqbd
alloation path.

Signed-off-by: Liang Zhen <liang@whamcloud.com>
Change-Id: I4ba442801859ae58cf8e8dd8ae18af1062379639
Reviewed-on: http://review.whamcloud.com/4939
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Bobi Jam <bobijam@gmail.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Prakash Surya <surya1@llnl.gov>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---
 lustre/include/lustre_net.h |  2 ++
 lustre/ptlrpc/service.c     | 28 ++++++++++++++++++++++++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h
index c02ecc2..fb49288 100644
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -1426,6 +1426,8 @@ struct ptlrpc_service_part {
 	int				scp_nrqbds_total;
 	/** # posted request buffers for receiving */
 	int				scp_nrqbds_posted;
+	/** in progress of allocating rqbd */
+	int				scp_rqbd_allocating;
 	/** # incoming reqs */
 	int				scp_nreqs_incoming;
 	/** request buffers to be reposted */
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c
index cd94b25..ab19b5c 100644
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -125,9 +125,25 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
         int                                rc = 0;
         int                                i;
 
+	if (svcpt->scp_rqbd_allocating)
+		goto try_post;
+
+	spin_lock(&svcpt->scp_lock);
+	/* check again with lock */
+	if (svcpt->scp_rqbd_allocating) {
+		/* NB: we might allow more than one thread in the future */
+		LASSERT(svcpt->scp_rqbd_allocating == 1);
+		spin_unlock(&svcpt->scp_lock);
+		goto try_post;
+	}
+
+	svcpt->scp_rqbd_allocating++;
+	spin_unlock(&svcpt->scp_lock);
+
+
         for (i = 0; i < svc->srv_nbuf_per_group; i++) {
-                /* NB: another thread might be doing this as well, we need to
-                 * make sure that it wouldn't over-allocate, see LU-1212. */
+                /* NB: another thread might have recycled enough rqbds, we
+		 * need to make sure it wouldn't over-allocate, see LU-1212. */
 		if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group)
 			break;
 
@@ -141,11 +157,19 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
                 }
 	}
 
+	spin_lock(&svcpt->scp_lock);
+
+	LASSERT(svcpt->scp_rqbd_allocating == 1);
+	svcpt->scp_rqbd_allocating--;
+
+	spin_unlock(&svcpt->scp_lock);
+
 	CDEBUG(D_RPCTRACE,
 	       "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
 	       svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted,
 	       svcpt->scp_nrqbds_total, rc);
 
+ try_post:
 	if (post && rc == 0)
 		rc = ptlrpc_server_post_idle_rqbds(svcpt);
 
-- 
1.8.3.1