Whamcloud - gitweb
LU-1212 ptlrpc: ptlrpc_grow_req_bufs is racy
authorLiang Zhen <liang@whamcloud.com>
Wed, 14 Mar 2012 04:41:08 +0000 (12:41 +0800)
committerOleg Drokin <green@whamcloud.com>
Fri, 16 Mar 2012 21:36:10 +0000 (17:36 -0400)
multiple ptlrpc service threads can enter ptlrpc_grow_req_bufs()
the same time if they found "low_water" in ptlrpc_check_rqbd_pool(),
each of these threads will allocate ptlrpc_service::srv_nbuf_per_group
request buffers and could consume all memory.

Signed-off-by: Liang Zhen <liang@whamcloud.com>
Change-Id: I83d6fe53a0f86691ae7e2afb3d75fb8677f58688
Reviewed-on: http://review.whamcloud.com/2308
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
lustre/ptlrpc/service.c

index 63576b4..530e23b 100644 (file)
@@ -121,25 +121,36 @@ int
 ptlrpc_grow_req_bufs(struct ptlrpc_service *svc)
 {
         struct ptlrpc_request_buffer_desc *rqbd;
+        int                                rc = 0;
         int                                i;
 
-        CDEBUG(D_RPCTRACE, "%s: allocate %d new %d-byte reqbufs (%d/%d left)\n",
-               svc->srv_name, svc->srv_nbuf_per_group, svc->srv_buf_size,
-               svc->srv_nrqbd_receiving, svc->srv_nbufs);
         for (i = 0; i < svc->srv_nbuf_per_group; i++) {
+                /* NB: another thread might be doing this as well, we need to
+                 * make sure that it wouldn't over-allocate, see LU-1212. */
+                if (svc->srv_nrqbd_receiving >= svc->srv_nbuf_per_group)
+                        break;
+
                 rqbd = ptlrpc_alloc_rqbd(svc);
 
                 if (rqbd == NULL) {
-                        CERROR ("%s: Can't allocate request buffer\n",
-                                svc->srv_name);
-                        return (-ENOMEM);
+                        CERROR("%s: Can't allocate request buffer\n",
+                               svc->srv_name);
+                        rc = -ENOMEM;
+                        break;
                 }
 
-                if (ptlrpc_server_post_idle_rqbds(svc) < 0)
-                        return (-EAGAIN);
+                if (ptlrpc_server_post_idle_rqbds(svc) < 0) {
+                        rc = -EAGAIN;
+                        break;
+                }
         }
 
-        return (0);
+        CDEBUG(D_RPCTRACE,
+               "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
+               svc->srv_name, i, svc->srv_buf_size,
+               svc->srv_nrqbd_receiving, svc->srv_nbufs, rc);
+
+        return rc;
 }
 
 /**
@@ -1957,7 +1968,7 @@ ptlrpc_check_rqbd_pool(struct ptlrpc_service *svc)
 {
         int avail = svc->srv_nrqbd_receiving;
         int low_water = test_req_buffer_pressure ? 0 :
-                        svc->srv_nbuf_per_group/2;
+                        svc->srv_nbuf_per_group / 2;
 
         /* NB I'm not locking; just looking. */