Whamcloud - gitweb
LU-7734 lnet: automatic sizing of router pinger buffers 24/22024/7
authorOlaf Weber <olaf@sgi.com>
Fri, 27 Jan 2017 15:16:34 +0000 (16:16 +0100)
committerOlaf Weber <olaf@sgi.com>
Fri, 27 Jan 2017 15:41:18 +0000 (16:41 +0100)
The router pinger uses fixed-size buffers to receive the data
returned by a ping. When a router has more than 16 interfaces
(including loopback) this means the data for some interfaces
is dropped.

Detect this situation, and track the number of remote NIs in
the lnet_rc_data_t structure.  lnet_create_rc_data_locked()
becomes lnet_update_rc_data_locked(), and modified to replace
an existing ping buffer if one is present. It is now also
called by lnet_ping_router_locked() when the existing ping
buffer is too small.

Test-Parameters: trivial
Signed-off-by: Olaf Weber <olaf@sgi.com>
Change-Id: I9474e2e162d2facdf571fe207dea254dd3f8e469

lnet/include/lnet/lib-types.h
lnet/lnet/router.c

index b24c76c..7632048 100644 (file)
@@ -449,14 +449,13 @@ struct lnet_ping_buffer {
 
 
 /* router checker data, per router */
-#define LNET_MAX_RTR_NIS   LNET_MIN_INTERFACES
-#define LNET_RTR_PINGINFO_SIZE LNET_PING_INFO_SIZE(LNET_MAX_RTR_NIS)
 typedef struct {
        /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
        struct list_head        rcd_list;
        lnet_handle_md_t        rcd_mdh;        /* ping buffer MD */
        struct lnet_peer_ni     *rcd_gateway;   /* reference to gateway */
        struct lnet_ping_buffer *rcd_pingbuffer;/* ping buffer */
+       int                     rcd_nnis;       /* desired size of buffer */
 } lnet_rc_data_t;
 
 struct lnet_peer_ni {
index 0ab2a60..44b690f 100644 (file)
@@ -717,8 +717,11 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd)
 
        /* Determine the number of NIs for which there is data. */
        nnis = pbuf->pb_info.pi_nnis;
-       if (pbuf->pb_nnis < nnis)
+       if (pbuf->pb_nnis < nnis) {
+               if (rcd->rcd_nnis < nnis)
+                       rcd->rcd_nnis = nnis;
                nnis = pbuf->pb_nnis;
+       }
 
        list_for_each_entry(rte, &gw->lpni_routes, lr_gwlist) {
                int     down = 0;
@@ -732,7 +735,7 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd)
                }
 
                for (i = 0; i < nnis; i++) {
-                       lnet_ni_status_t *stat = &pbuf->pb_info.pi_ni[i];
+                       struct lnet_ni_status *stat = &pbuf->pb_info.pi_ni[i];
                        lnet_nid_t       nid = stat->ns_nid;
 
                        if (nid == LNET_NID_ANY) {
@@ -946,27 +949,46 @@ lnet_destroy_rc_data(lnet_rc_data_t *rcd)
 }
 
 static lnet_rc_data_t *
-lnet_create_rc_data_locked(struct lnet_peer_ni *gateway)
+lnet_update_rc_data_locked(struct lnet_peer_ni *gateway)
 {
-       lnet_rc_data_t          *rcd = NULL;
-       struct lnet_ping_buffer *pbuf;
-       int                     rc;
-       int                     i;
+       lnet_handle_md_t mdh;
+       lnet_rc_data_t *rcd;
+       struct lnet_ping_buffer *pbuf = NULL;
+       int nnis = LNET_MIN_INTERFACES;
+       int rc;
+       int i;
+
+       rcd = gateway->lpni_rcd;
+       if (rcd) {
+               nnis = rcd->rcd_nnis;
+               mdh = rcd->rcd_mdh;
+               LNetInvalidateHandle(&rcd->rcd_mdh);
+               pbuf = rcd->rcd_pingbuffer;
+               rcd->rcd_pingbuffer = NULL;
+       } else {
+               LNetInvalidateHandle(&mdh);
+       }
 
        lnet_net_unlock(gateway->lpni_cpt);
 
-       LIBCFS_ALLOC(rcd, sizeof(*rcd));
-       if (rcd == NULL)
-               goto out;
+       if (rcd) {
+               LNetMDUnlink(mdh);
+               lnet_ping_buffer_decref(pbuf);
+       } else {
+               LIBCFS_ALLOC(rcd, sizeof(*rcd));
+               if (rcd == NULL)
+                       goto out;
 
-       LNetInvalidateHandle(&rcd->rcd_mdh);
-       INIT_LIST_HEAD(&rcd->rcd_list);
+               LNetInvalidateHandle(&rcd->rcd_mdh);
+               INIT_LIST_HEAD(&rcd->rcd_list);
+               rcd->rcd_nnis = nnis;
+       }
 
-       pbuf = lnet_ping_buffer_alloc(LNET_MAX_RTR_NIS, GFP_NOFS);
+       pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
        if (pbuf == NULL)
                goto out;
 
-       for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
+       for (i = 0; i < nnis; i++) {
                pbuf->pb_info.pi_ni[i].ns_nid = LNET_NID_ANY;
                pbuf->pb_info.pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
        }
@@ -975,7 +997,7 @@ lnet_create_rc_data_locked(struct lnet_peer_ni *gateway)
        LASSERT(!LNetHandleIsInvalid(the_lnet.ln_rc_eqh));
        rc = LNetMDBind((lnet_md_t){.start     = &pbuf->pb_info,
                                    .user_ptr  = rcd,
-                                   .length    = LNET_RTR_PINGINFO_SIZE,
+                                   .length    = LNET_PING_INFO_SIZE(nnis),
                                    .threshold = LNET_MD_THRESH_INF,
                                    .options   = LNET_MD_TRUNCATE,
                                    .eq_handle = the_lnet.ln_rc_eqh},
@@ -983,33 +1005,39 @@ lnet_create_rc_data_locked(struct lnet_peer_ni *gateway)
                        &rcd->rcd_mdh);
        if (rc < 0) {
                CERROR("Can't bind MD: %d\n", rc);
-               goto out;
+               goto out_ping_buffer_decref;
        }
        LASSERT(rc == 0);
 
        lnet_net_lock(gateway->lpni_cpt);
-       /* router table changed or someone has created rcd for this gateway */
-       if (!lnet_isrouter(gateway) || gateway->lpni_rcd != NULL) {
-               lnet_net_unlock(gateway->lpni_cpt);
-               goto out;
+       /* Check if this is still a router. */
+       if (!lnet_isrouter(gateway))
+               goto out_unlock;
+       /* Check if someone else installed router data. */
+       if (gateway->lpni_rcd && gateway->lpni_rcd != rcd)
+               goto out_unlock;
+
+       /* Install and/or update the router data. */
+       if (!gateway->lpni_rcd) {
+               lnet_peer_ni_addref_locked(gateway);
+               rcd->rcd_gateway = gateway;
+               gateway->lpni_rcd = rcd;
        }
-
-       lnet_peer_ni_addref_locked(gateway);
-       rcd->rcd_gateway = gateway;
-       gateway->lpni_rcd = rcd;
+       rcd->rcd_pingbuffer = pbuf;
+       rcd->rcd_mdh = mdh;
        gateway->lpni_ping_notsent = 0;
 
        return rcd;
 
+out_unlock:
+       lnet_net_unlock(gateway->lpni_cpt);
+       rc = LNetMDUnlink(mdh);
+       LASSERT(rc == 0);
+out_ping_buffer_decref:
+       lnet_ping_buffer_decref(pbuf);
 out:
-       if (rcd != NULL) {
-               if (!LNetHandleIsInvalid(rcd->rcd_mdh)) {
-                       rc = LNetMDUnlink(rcd->rcd_mdh);
-                       LASSERT(rc == 0);
-               }
+       if (rcd != gateway->lpni_rcd)
                lnet_destroy_rc_data(rcd);
-       }
-
        lnet_net_lock(gateway->lpni_cpt);
        return gateway->lpni_rcd;
 }
@@ -1052,8 +1080,9 @@ lnet_ping_router_locked (struct lnet_peer_ni *rtr)
                return;
        }
 
-       rcd = rtr->lpni_rcd != NULL ?
-             rtr->lpni_rcd : lnet_create_rc_data_locked(rtr);
+       rcd = rtr->lpni_rcd;
+       if (!rcd || rcd->rcd_nnis > rcd->rcd_pingbuffer->pb_nnis)
+               rcd = lnet_update_rc_data_locked(rtr);
 
        if (rcd == NULL)
                return;