Whamcloud - gitweb
LU-2432 ptlrpc: alloc_rqbd spin on vmap_area_lock
authorLiang Zhen <liang@whamcloud.com>
Tue, 1 Jan 2013 08:23:09 +0000 (16:23 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 8 Jan 2013 05:03:59 +0000 (00:03 -0500)
vmalloc based allocations can potentially take a very long time to
complete due to a regression in the kernel. As a result, MDS service
threads might lock up for certain periods of time while all of the
cores spin on the vmap_area_lock down in ptlrpc_alloc_rqbd.

Tihs patch only allow one thread of each CPT to enter rqbd
alloation path.

Signed-off-by: Liang Zhen <liang@whamcloud.com>
Change-Id: I4ba442801859ae58cf8e8dd8ae18af1062379639
Reviewed-on: http://review.whamcloud.com/4939
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Bobi Jam <bobijam@gmail.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Prakash Surya <surya1@llnl.gov>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_net.h
lustre/ptlrpc/service.c

index c02ecc2..fb49288 100644 (file)
@@ -1426,6 +1426,8 @@ struct ptlrpc_service_part {
        int                             scp_nrqbds_total;
        /** # posted request buffers for receiving */
        int                             scp_nrqbds_posted;
+       /** in progress of allocating rqbd */
+       int                             scp_rqbd_allocating;
        /** # incoming reqs */
        int                             scp_nreqs_incoming;
        /** request buffers to be reposted */
index cd94b25..ab19b5c 100644 (file)
@@ -125,9 +125,25 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
         int                                rc = 0;
         int                                i;
 
+       if (svcpt->scp_rqbd_allocating)
+               goto try_post;
+
+       spin_lock(&svcpt->scp_lock);
+       /* check again with lock */
+       if (svcpt->scp_rqbd_allocating) {
+               /* NB: we might allow more than one thread in the future */
+               LASSERT(svcpt->scp_rqbd_allocating == 1);
+               spin_unlock(&svcpt->scp_lock);
+               goto try_post;
+       }
+
+       svcpt->scp_rqbd_allocating++;
+       spin_unlock(&svcpt->scp_lock);
+
+
         for (i = 0; i < svc->srv_nbuf_per_group; i++) {
-                /* NB: another thread might be doing this as well, we need to
-                 * make sure that it wouldn't over-allocate, see LU-1212. */
+                /* NB: another thread might have recycled enough rqbds, we
+                * need to make sure it wouldn't over-allocate, see LU-1212. */
                if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group)
                        break;
 
@@ -141,11 +157,19 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
                 }
        }
 
+       spin_lock(&svcpt->scp_lock);
+
+       LASSERT(svcpt->scp_rqbd_allocating == 1);
+       svcpt->scp_rqbd_allocating--;
+
+       spin_unlock(&svcpt->scp_lock);
+
        CDEBUG(D_RPCTRACE,
               "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
               svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted,
               svcpt->scp_nrqbds_total, rc);
 
+ try_post:
        if (post && rc == 0)
                rc = ptlrpc_server_post_idle_rqbds(svcpt);