Whamcloud - gitweb
LU-2084 lnet: don't retry allocating router buffers 74/45174/4
authorAndreas Dilger <adilger@whamcloud.com>
Sat, 9 Oct 2021 01:20:49 +0000 (19:20 -0600)
committerOleg Drokin <green@whamcloud.com>
Wed, 27 Oct 2021 00:35:19 +0000 (00:35 +0000)
Don't loop indefinitely trying to allocate router buffer pools if
the number of requested buffers is too large for the system.

Test-Parameters: trivial
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Ic0f2ccf0f7b38dfa254e46e268b27092342efdb5
Reviewed-on: https://review.whamcloud.com/45174
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/router.c

index f46627e..d7016a1 100644 (file)
@@ -1300,8 +1300,8 @@ lnet_new_rtrbuf(struct lnet_rtrbufpool *rbp, int cpt)
        rb->rb_pool = rbp;
 
        for (i = 0; i < npages; i++) {
-               page = cfs_page_cpt_alloc(lnet_cpt_table(), cpt,
-                                         GFP_KERNEL | __GFP_ZERO);
+               page = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, GFP_KERNEL |
+                                         __GFP_ZERO | __GFP_NORETRY);
                if (page == NULL) {
                        while (--i >= 0)
                                __free_page(rb->rb_kiov[i].bv_page);
@@ -1382,8 +1382,8 @@ lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
        while (num_rb-- > 0) {
                rb = lnet_new_rtrbuf(rbp, cpt);
                if (rb == NULL) {
-                       CERROR("Failed to allocate %d route bufs of %d pages\n",
-                              nbufs, npages);
+                       CERROR("lnet: error allocating %ux%u page router buffers on CPT %u: rc = %d\n",
+                              nbufs, npages, cpt, -ENOMEM);
 
                        lnet_net_lock(cpt);
                        rbp->rbp_req_nbuffers = old_req_nbufs;
@@ -1531,9 +1531,11 @@ lnet_rtrpools_alloc(int im_a_router)
        } else if (!strcmp(forwarding, "enabled")) {
                /* explicitly enabled */
        } else {
-               LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either "
-                                  "'enabled' or 'disabled'\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               LCONSOLE_ERROR_MSG(0x10b,
+                                  "lnet: forwarding='%s' not set to either 'enabled' or 'disabled': rc = %d\n",
+                                  forwarding, rc);
+               return rc;
        }
 
        nrb_tiny = lnet_nrb_tiny_calculate();
@@ -1552,30 +1554,32 @@ lnet_rtrpools_alloc(int im_a_router)
                                                LNET_NRBPOOLS *
                                                sizeof(struct lnet_rtrbufpool));
        if (the_lnet.ln_rtrpools == NULL) {
+               rc = -ENOMEM;
                LCONSOLE_ERROR_MSG(0x10c,
-                                  "Failed to initialize router buffe pool\n");
-               return -ENOMEM;
+                       "lnet: error allocating router buffer pool: rc = %d\n",
+                       rc);
+               return rc;
        }
 
        cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
                lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0);
                rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
                                              nrb_tiny, i);
-               if (rc != 0)
+               if (rc)
                        goto failed;
 
                lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX],
                                  LNET_NRB_SMALL_PAGES);
                rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
                                              nrb_small, i);
-               if (rc != 0)
+               if (rc)
                        goto failed;
 
                lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX],
                                  LNET_NRB_LARGE_PAGES);
                rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
                                              nrb_large, i);
-               if (rc != 0)
+               if (rc)
                        goto failed;
        }