Whamcloud - gitweb
LU-14555 lnet: asym route inconsistency warning
[fs/lustre-release.git] / lnet / lnet / router.c
index c7a160b..28b64ff 100644 (file)
@@ -187,13 +187,13 @@ lnet_move_route(struct lnet_route *route, struct lnet_peer *lp,
 
        if (lp) {
                route = list_first_entry(l, struct lnet_route,
-                                       lr_list);
+                                        lr_list);
                route->lr_gateway = lp;
                lnet_add_route_to_rnet(rnet, route);
        } else {
                while (!list_empty(l) && !rt_list) {
                        route = list_first_entry(l, struct lnet_route,
-                                lr_list);
+                                                lr_list);
                        list_del(&route->lr_list);
                        LIBCFS_FREE(route, sizeof(*route));
                }
@@ -382,7 +382,8 @@ static inline void
 lnet_check_route_inconsistency(struct lnet_route *route)
 {
        if (!route->lr_single_hop &&
-           (route->lr_hops == 1 || route->lr_hops == LNET_UNDEFINED_HOPS)) {
+           (route->lr_hops == 1 || route->lr_hops == LNET_UNDEFINED_HOPS) &&
+           avoid_asym_router_failure) {
                CWARN("route %s->%s is detected to be multi-hop but hop count is set to %d\n",
                        libcfs_net2str(route->lr_net),
                        libcfs_nidstr(&route->lr_gateway->lp_primary_nid),
@@ -582,15 +583,12 @@ struct lnet_remotenet *
 lnet_find_rnet_locked(__u32 net)
 {
        struct lnet_remotenet *rnet;
-       struct list_head *tmp;
        struct list_head *rn_list;
 
        LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
 
        rn_list = lnet_net2rnethash(net);
-       list_for_each(tmp, rn_list) {
-               rnet = list_entry(tmp, struct lnet_remotenet, lrn_list);
-
+       list_for_each_entry(rnet, rn_list, lrn_list) {
                if (rnet->lrn_net == net)
                        return rnet;
        }
@@ -726,7 +724,7 @@ lnet_add_route(__u32 net, __u32 hops, struct lnet_nid *gateway,
         * lnet_nid2peerni_ex() grabs a ref on the lpni. We will need to
         * lose that once we're done
         */
-       lpni = lnet_nid2peerni_ex(gateway, LNET_LOCK_EX);
+       lpni = lnet_nid2peerni_ex(gateway);
        if (IS_ERR(lpni)) {
                lnet_net_unlock(LNET_LOCK_EX);
 
@@ -740,7 +738,9 @@ lnet_add_route(__u32 net, __u32 hops, struct lnet_nid *gateway,
                return rc;
        }
 
-       LASSERT(lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer);
+       LASSERT(lpni);
+       LASSERT(lpni->lpni_peer_net);
+       LASSERT(lpni->lpni_peer_net->lpn_peer);
        gw = lpni->lpni_peer_net->lpn_peer;
 
        route->lr_gateway = gw;
@@ -977,8 +977,6 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops, lnet_nid_t *gateway,
        struct lnet_remotenet *rnet;
        struct list_head *rn_list;
        struct lnet_route *route;
-       struct list_head *e1;
-       struct list_head *e2;
        int cpt;
        int i;
 
@@ -986,13 +984,8 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops, lnet_nid_t *gateway,
 
        for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
                rn_list = &the_lnet.ln_remote_nets_hash[i];
-               list_for_each(e1, rn_list) {
-                       rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
-                       list_for_each(e2, &rnet->lrn_routes) {
-                               route = list_entry(e2, struct lnet_route,
-                                                  lr_list);
-
+               list_for_each_entry(rnet, rn_list, lrn_list) {
+                       list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
                                if (idx-- == 0) {
                                        *net      = rnet->lrn_net;
                                        *gateway  = lnet_nid_to_nid4(&route->lr_nid);
@@ -1023,7 +1016,6 @@ static void
 lnet_wait_known_routerstate(void)
 {
        struct lnet_peer *rtr;
-       struct list_head *entry;
        int all_known;
 
        LASSERT(the_lnet.ln_mt_state == LNET_MT_STATE_RUNNING);
@@ -1032,10 +1024,7 @@ lnet_wait_known_routerstate(void)
                int cpt = lnet_net_lock_current();
 
                all_known = 1;
-               list_for_each(entry, &the_lnet.ln_routers) {
-                       rtr = list_entry(entry, struct lnet_peer,
-                                        lp_rtr_list);
-
+               list_for_each_entry(rtr, &the_lnet.ln_routers, lp_rtr_list) {
                        spin_lock(&rtr->lp_lock);
 
                        if ((rtr->lp_state & LNET_PEER_RTR_DISCOVERED) == 0) {
@@ -1081,7 +1070,7 @@ lnet_update_ni_status_locked(void)
 
        timeout = router_ping_timeout + alive_router_check_interval;
 
-       now = ktime_get_real_seconds();
+       now = ktime_get_seconds();
        list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
                if (net->net_lnd->lnd_type == LOLND)
                        continue;
@@ -1157,7 +1146,6 @@ lnet_check_routers(void)
        struct lnet_peer_net *first_lpn;
        struct lnet_peer_net *lpn;
        struct lnet_peer_ni *lpni;
-       struct list_head *entry;
        struct lnet_peer *rtr;
        bool push = false;
        bool needs_ping;
@@ -1172,10 +1160,7 @@ lnet_check_routers(void)
 rescan:
        version = the_lnet.ln_routers_version;
 
-       list_for_each(entry, &the_lnet.ln_routers) {
-               rtr = list_entry(entry, struct lnet_peer,
-                                lp_rtr_list);
-
+       list_for_each_entry(rtr, &the_lnet.ln_routers, lp_rtr_list) {
                /* If we're currently discovering the peer then don't
                 * issue another discovery
                 */
@@ -1235,8 +1220,7 @@ rescan:
                spin_unlock(&rtr->lp_lock);
 
                /* find the peer_ni associated with the primary NID */
-               lpni = lnet_peer_get_ni_locked(
-                       rtr, lnet_nid_to_nid4(&rtr->lp_primary_nid));
+               lpni = lnet_peer_ni_get_locked(rtr, &rtr->lp_primary_nid);
                if (!lpni) {
                        CDEBUG(D_NET, "Expected to find an lpni for %s, but non found\n",
                               libcfs_nidstr(&rtr->lp_primary_nid));
@@ -1344,7 +1328,7 @@ lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
 
        /* Free buffers on the free list. */
        while (!list_empty(&tmp)) {
-               rb = list_entry(tmp.next, struct lnet_rtrbuf, rb_list);
+               rb = list_first_entry(&tmp, struct lnet_rtrbuf, rb_list);
                list_del(&rb->rb_list);
                lnet_destroy_rtrbuf(rb, npages);
        }
@@ -1418,8 +1402,9 @@ lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
        return 0;
 
 failed:
-       while (!list_empty(&rb_list)) {
-               rb = list_entry(rb_list.next, struct lnet_rtrbuf, rb_list);
+       while ((rb = list_first_entry_or_null(&rb_list,
+                                             struct lnet_rtrbuf,
+                                             rb_list)) != NULL) {
                list_del(&rb->rb_list);
                lnet_destroy_rtrbuf(rb, npages);
        }
@@ -1723,25 +1708,27 @@ lnet_notify_peer_down(struct lnet_ni *ni, struct lnet_nid *nid)
  * when: notificaiton time.
  */
 int
-lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
+lnet_notify(struct lnet_ni *ni, lnet_nid_t nid4, bool alive, bool reset,
            time64_t when)
 {
        struct lnet_peer_ni *lpni = NULL;
        struct lnet_route *route;
        struct lnet_peer *lp;
        time64_t now = ktime_get_seconds();
+       struct lnet_nid nid;
        int cpt;
 
+       lnet_nid4_to_nid(nid4, &nid);
        LASSERT(!in_interrupt());
 
        CDEBUG(D_NET, "%s notifying %s: %s\n",
               (ni == NULL) ? "userspace" : libcfs_nidstr(&ni->ni_nid),
-              libcfs_nid2str(nid), alive ? "up" : "down");
+              libcfs_nidstr(&nid), alive ? "up" : "down");
 
        if (ni != NULL &&
-           LNET_NID_NET(&ni->ni_nid) != LNET_NIDNET(nid)) {
+           LNET_NID_NET(&ni->ni_nid) != LNET_NID_NET(&nid)) {
                CWARN("Ignoring notification of %s %s by %s (different net)\n",
-                     libcfs_nid2str(nid), alive ? "birth" : "death",
+                     libcfs_nidstr(&nid), alive ? "birth" : "death",
                      libcfs_nidstr(&ni->ni_nid));
                return -EINVAL;
        }
@@ -1750,7 +1737,7 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
        if (when > now) {
                CWARN("Ignoring prediction from %s of %s %s %lld seconds in the future\n",
                        ni ? libcfs_nidstr(&ni->ni_nid) :  "userspace",
-                       libcfs_nid2str(nid), alive ? "up" : "down", when - now);
+                       libcfs_nidstr(&nid), alive ? "up" : "down", when - now);
                return -EINVAL;
        }
 
@@ -1768,11 +1755,11 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
                return -ESHUTDOWN;
        }
 
-       lpni = lnet_find_peer_ni_locked(nid);
+       lpni = lnet_peer_ni_find_locked(&nid);
        if (lpni == NULL) {
                /* nid not found */
                lnet_net_unlock(0);
-               CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
+               CDEBUG(D_NET, "%s not found\n", libcfs_nidstr(&nid));
                return 0;
        }