Whamcloud - gitweb
LU-9121 lnet: Preferred gateway selection
authorAmir Shehata <ashehata@whamcloud.com>
Wed, 20 Feb 2019 02:13:40 +0000 (18:13 -0800)
committerAmir Shehata <ashehata@whamcloud.com>
Mon, 22 Feb 2021 17:10:56 +0000 (09:10 -0800)
Add mechanism for managing preferred gateway lists.
When selecting a route through a gateway, if there exists
a preferred gateway list for the destination peer, then choose
the preferred gateway. If there are multiple preferred
gateways, to make the selection, use in order of decreasing
importance: route priority, number of hops, number of available
tx credits on the associated lpni and route sequence counters.
If there are no preferred routes, select the best route
available using the same criteria.

Test-Parameters: trivial testlist=lnet-selftest,sanity-lnet
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: If46920cf7b79aa8b211d6c0a35995edce9b1699a
Reviewed-on: https://review.whamcloud.com/34353
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/lnet/lib-move.c
lnet/lnet/peer.c

index d245c11..eb5dc88 100644 (file)
@@ -878,6 +878,11 @@ void lnet_debug_peer(lnet_nid_t nid);
 struct lnet_peer_net *lnet_peer_get_net_locked(struct lnet_peer *peer,
                                               __u32 net_id);
 bool lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid);
 struct lnet_peer_net *lnet_peer_get_net_locked(struct lnet_peer *peer,
                                               __u32 net_id);
 bool lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid);
+int lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
+void lnet_peer_clr_pref_nids(struct lnet_peer_ni *lpni);
+bool lnet_peer_is_pref_rtr_locked(struct lnet_peer_ni *lpni, lnet_nid_t gw_nid);
+void lnet_peer_clr_pref_rtrs(struct lnet_peer_ni *lpni);
+int lnet_peer_add_pref_rtr(struct lnet_peer_ni *lpni, lnet_nid_t nid);
 int lnet_peer_ni_set_non_mr_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
 int lnet_add_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid, bool mr);
 int lnet_del_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid);
 int lnet_peer_ni_set_non_mr_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
 int lnet_add_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid, bool mr);
 int lnet_del_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid);
index c1e6483..88ba642 100644 (file)
@@ -1283,24 +1283,6 @@ routing_off:
        }
 }
 
        }
 }
 
-static int
-lnet_compare_gw_lpnis(struct lnet_peer_ni *p1, struct lnet_peer_ni *p2)
-{
-       if (p1->lpni_txqnob < p2->lpni_txqnob)
-               return 1;
-
-       if (p1->lpni_txqnob > p2->lpni_txqnob)
-               return -1;
-
-       if (p1->lpni_txcredits > p2->lpni_txcredits)
-               return 1;
-
-       if (p1->lpni_txcredits < p2->lpni_txcredits)
-               return -1;
-
-       return 0;
-}
-
 static struct lnet_peer_ni *
 lnet_select_peer_ni(struct lnet_ni *best_ni, lnet_nid_t dst_nid,
                    struct lnet_peer *peer,
 static struct lnet_peer_ni *
 lnet_select_peer_ni(struct lnet_ni *best_ni, lnet_nid_t dst_nid,
                    struct lnet_peer *peer,
@@ -1437,6 +1419,24 @@ lnet_find_best_lpni(struct lnet_ni *lni, lnet_nid_t dst_nid,
        return NULL;
 }
 
        return NULL;
 }
 
+static int
+lnet_compare_gw_lpnis(struct lnet_peer_ni *lpni1, struct lnet_peer_ni *lpni2)
+{
+       if (lpni1->lpni_txqnob < lpni2->lpni_txqnob)
+               return 1;
+
+       if (lpni1->lpni_txqnob > lpni2->lpni_txqnob)
+               return -1;
+
+       if (lpni1->lpni_txcredits > lpni2->lpni_txcredits)
+               return 1;
+
+       if (lpni1->lpni_txcredits < lpni2->lpni_txcredits)
+               return -1;
+
+       return 0;
+}
+
 /* Compare route priorities and hop counts */
 static int
 lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
 /* Compare route priorities and hop counts */
 static int
 lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
@@ -1461,6 +1461,7 @@ lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
 
 static struct lnet_route *
 lnet_find_route_locked(struct lnet_remotenet *rnet, __u32 src_net,
 
 static struct lnet_route *
 lnet_find_route_locked(struct lnet_remotenet *rnet, __u32 src_net,
+                      struct lnet_peer_ni *remote_lpni,
                       struct lnet_route **prev_route,
                       struct lnet_peer_ni **gwni)
 {
                       struct lnet_route **prev_route,
                       struct lnet_peer_ni **gwni)
 {
@@ -1469,6 +1470,8 @@ lnet_find_route_locked(struct lnet_remotenet *rnet, __u32 src_net,
        struct lnet_route *last_route;
        struct lnet_route *route;
        int rc;
        struct lnet_route *last_route;
        struct lnet_route *route;
        int rc;
+       bool best_rte_is_preferred = false;
+       lnet_nid_t gw_pnid;
 
        CDEBUG(D_NET, "Looking up a route to %s, from %s\n",
               libcfs_net2str(rnet->lrn_net), libcfs_net2str(src_net));
 
        CDEBUG(D_NET, "Looking up a route to %s, from %s\n",
               libcfs_net2str(rnet->lrn_net), libcfs_net2str(src_net));
@@ -1477,43 +1480,75 @@ lnet_find_route_locked(struct lnet_remotenet *rnet, __u32 src_net,
        list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
                if (!lnet_is_route_alive(route))
                        continue;
        list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
                if (!lnet_is_route_alive(route))
                        continue;
+               gw_pnid = route->lr_gateway->lp_primary_nid;
 
 
-               /*
-                * Restrict the selection of the router NI on the src_net
-                * provided. If the src_net is LNET_NID_ANY, then select
-                * the best interface available.
+               /* no protection on below fields, but it's harmless */
+               if (last_route && (last_route->lr_seq - route->lr_seq < 0))
+                       last_route = route;
+
+               /* if the best route found is in the preferred list then
+                * tag it as preferred and use it later on. But if we
+                * didn't find any routes which are on the preferred list
+                * then just use the best route possible.
                 */
                 */
-               if (!best_route) {
+               rc = lnet_peer_is_pref_rtr_locked(remote_lpni, gw_pnid);
+
+               if (!best_route || (rc && !best_rte_is_preferred)) {
+                       /* Restrict the selection of the router NI on the
+                        * src_net provided. If the src_net is LNET_NID_ANY,
+                        * then select the best interface available.
+                        */
                        lpni = lnet_find_best_lpni(NULL, LNET_NID_ANY,
                                                   route->lr_gateway,
                                                   src_net);
                        lpni = lnet_find_best_lpni(NULL, LNET_NID_ANY,
                                                   route->lr_gateway,
                                                   src_net);
-                       if (lpni) {
-                               best_route = last_route = route;
-                               best_gw_ni = lpni;
-                       } else {
-                               CDEBUG(D_NET, "Gateway %s does not have a peer NI on net %s\n",
-                                      libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+                       if (!lpni) {
+                               CDEBUG(D_NET,
+                                      "Gateway %s does not have a peer NI on net %s\n",
+                                      libcfs_nid2str(gw_pnid),
                                       libcfs_net2str(src_net));
                                       libcfs_net2str(src_net));
+                               continue;
                        }
                        }
+               }
 
 
+               if (rc && !best_rte_is_preferred) {
+                       /* This is the first preferred route we found,
+                        * so it beats any route found previously
+                        */
+                       best_route = route;
+                       if (!last_route)
+                               last_route = route;
+                       best_gw_ni = lpni;
+                       best_rte_is_preferred = true;
+                       CDEBUG(D_NET, "preferred gw = %s\n",
+                              libcfs_nid2str(gw_pnid));
+                       continue;
+               } else if ((!rc) && best_rte_is_preferred)
+                       /* The best route we found so far is in the preferred
+                        * list, so it beats any non-preferred route
+                        */
                        continue;
                        continue;
-               }
 
 
-               /* no protection on below fields, but it's harmless */
-               if (last_route->lr_seq - route->lr_seq < 0)
-                       last_route = route;
+               if (!best_route) {
+                       best_route = last_route = route;
+                       best_gw_ni = lpni;
+                       continue;
+               }
 
                rc = lnet_compare_routes(route, best_route);
                if (rc == -1)
                        continue;
 
 
                rc = lnet_compare_routes(route, best_route);
                if (rc == -1)
                        continue;
 
+               /* Restrict the selection of the router NI on the
+                * src_net provided. If the src_net is LNET_NID_ANY,
+                * then select the best interface available.
+                */
                lpni = lnet_find_best_lpni(NULL, LNET_NID_ANY,
                                           route->lr_gateway,
                                           src_net);
                lpni = lnet_find_best_lpni(NULL, LNET_NID_ANY,
                                           route->lr_gateway,
                                           src_net);
-               /* restrict the lpni on the src_net if specified */
                if (!lpni) {
                if (!lpni) {
-                       CDEBUG(D_NET, "Gateway %s does not have a peer NI on net %s\n",
-                              libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+                       CDEBUG(D_NET,
+                              "Gateway %s does not have a peer NI on net %s\n",
+                              libcfs_nid2str(gw_pnid),
                               libcfs_net2str(src_net));
                        continue;
                }
                               libcfs_net2str(src_net));
                        continue;
                }
@@ -2009,6 +2044,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
        lnet_nid_t src_nid = (sd->sd_src_nid != LNET_NID_ANY) ? sd->sd_src_nid :
                (sd->sd_best_ni != NULL) ? sd->sd_best_ni->ni_nid :
                LNET_NID_ANY;
        lnet_nid_t src_nid = (sd->sd_src_nid != LNET_NID_ANY) ? sd->sd_src_nid :
                (sd->sd_best_ni != NULL) ? sd->sd_best_ni->ni_nid :
                LNET_NID_ANY;
+       int best_lpn_healthv = 0;
+       __u32 best_lpn_sel_prio = LNET_MAX_SELECTION_PRIORITY;
 
        CDEBUG(D_NET, "using src nid %s for route restriction\n",
               libcfs_nid2str(src_nid));
 
        CDEBUG(D_NET, "using src nid %s for route restriction\n",
               libcfs_nid2str(src_nid));
@@ -2064,9 +2101,22 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                                        best_rnet = rnet;
                                }
 
                                        best_rnet = rnet;
                                }
 
-                               if (best_lpn->lpn_seq <= lpn->lpn_seq)
+                               /* select the preferred peer net */
+                               if (best_lpn_healthv > lpn->lpn_healthv)
                                        continue;
                                        continue;
+                               else if (best_lpn_healthv < lpn->lpn_healthv)
+                                       goto use_lpn;
 
 
+                               if (best_lpn_sel_prio < lpn->lpn_sel_priority)
+                                       continue;
+                               else if (best_lpn_sel_prio > lpn->lpn_sel_priority)
+                                       goto use_lpn;
+
+                               if (best_lpn->lpn_seq <= lpn->lpn_seq)
+                                       continue;
+use_lpn:
+                               best_lpn_healthv = lpn->lpn_healthv;
+                               best_lpn_sel_prio = lpn->lpn_sel_priority;
                                best_lpn = lpn;
                                best_rnet = rnet;
                        }
                                best_lpn = lpn;
                                best_rnet = rnet;
                        }
@@ -2109,6 +2159,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                 */
                best_route = lnet_find_route_locked(best_rnet,
                                                    LNET_NIDNET(src_nid),
                 */
                best_route = lnet_find_route_locked(best_rnet,
                                                    LNET_NIDNET(src_nid),
+                                                   sd->sd_best_lpni,
                                                    &last_route, &gwni);
 
                if (!best_route) {
                                                    &last_route, &gwni);
 
                if (!best_route) {
index 826589f..5f6b3ad 100644 (file)
@@ -898,6 +898,94 @@ lnet_push_update_to_peers(int force)
        wake_up(&the_lnet.ln_dc_waitq);
 }
 
        wake_up(&the_lnet.ln_dc_waitq);
 }
 
+/* find the NID in the preferred gateways for the remote peer
+ * return:
+ *     false: list is not empty and NID is not preferred
+ *     false: list is empty
+ *     true: nid is found in the list
+ */
+bool
+lnet_peer_is_pref_rtr_locked(struct lnet_peer_ni *lpni,
+                            lnet_nid_t gw_nid)
+{
+       struct lnet_nid_list *ne;
+
+       CDEBUG(D_NET, "%s: rtr pref emtpy: %d\n",
+              libcfs_nid2str(lpni->lpni_nid),
+              list_empty(&lpni->lpni_rtr_pref_nids));
+
+       if (list_empty(&lpni->lpni_rtr_pref_nids))
+               return false;
+
+       /* iterate through all the preferred NIDs and see if any of them
+        * matches the provided gw_nid
+        */
+       list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
+               CDEBUG(D_NET, "Comparing pref %s with gw %s\n",
+                      libcfs_nid2str(ne->nl_nid),
+                      libcfs_nid2str(gw_nid));
+               if (ne->nl_nid == gw_nid)
+                       return true;
+       }
+
+       return false;
+}
+
+void
+lnet_peer_clr_pref_rtrs(struct lnet_peer_ni *lpni)
+{
+       struct list_head zombies;
+       struct lnet_nid_list *ne;
+       struct lnet_nid_list *tmp;
+       int cpt = lpni->lpni_cpt;
+
+       INIT_LIST_HEAD(&zombies);
+
+       lnet_net_lock(cpt);
+       list_splice_init(&lpni->lpni_rtr_pref_nids, &zombies);
+       lnet_net_unlock(cpt);
+
+       list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
+               list_del(&ne->nl_list);
+               LIBCFS_FREE(ne, sizeof(*ne));
+       }
+}
+
+int
+lnet_peer_add_pref_rtr(struct lnet_peer_ni *lpni,
+                      lnet_nid_t gw_nid)
+{
+       int cpt = lpni->lpni_cpt;
+       struct lnet_nid_list *ne = NULL;
+
+       /* This function is called with api_mutex held. When the api_mutex
+        * is held the list can not be modified, as it is only modified as
+        * a result of applying a UDSP and that happens under api_mutex
+        * lock.
+        */
+       __must_hold(&the_lnet.ln_api_mutex);
+
+       list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
+               if (ne->nl_nid == gw_nid)
+                       return -EEXIST;
+       }
+
+       LIBCFS_CPT_ALLOC(ne, lnet_cpt_table(), cpt, sizeof(*ne));
+       if (!ne)
+               return -ENOMEM;
+
+       ne->nl_nid = gw_nid;
+
+       /* Lock the cpt to protect against addition and checks in the
+        * selection algorithm
+        */
+       lnet_net_lock(cpt);
+       list_add(&ne->nl_list, &lpni->lpni_rtr_pref_nids);
+       lnet_net_unlock(cpt);
+
+       return 0;
+}
+
 /*
  * Test whether a ni is a preferred ni for this peer_ni, e.g, whether
  * this is a preferred point-to-point path. Call with lnet_net_lock in
 /*
  * Test whether a ni is a preferred ni for this peer_ni, e.g, whether
  * this is a preferred point-to-point path. Call with lnet_net_lock in
@@ -1129,6 +1217,29 @@ out:
        return rc;
 }
 
        return rc;
 }
 
+void
+lnet_peer_clr_pref_nids(struct lnet_peer_ni *lpni)
+{
+       struct list_head zombies;
+       struct lnet_nid_list *ne;
+       struct lnet_nid_list *tmp;
+
+       INIT_LIST_HEAD(&zombies);
+
+       lnet_net_lock(LNET_LOCK_EX);
+       if (lpni->lpni_pref_nnids == 1)
+               lpni->lpni_pref.nid = LNET_NID_ANY;
+       else if (lpni->lpni_pref_nnids > 1)
+               list_splice_init(&lpni->lpni_pref.nids, &zombies);
+       lpni->lpni_pref_nnids = 0;
+       lnet_net_unlock(LNET_LOCK_EX);
+
+       list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
+               list_del_init(&ne->nl_list);
+               LIBCFS_FREE(ne, sizeof(*ne));
+       }
+}
+
 lnet_nid_t
 lnet_peer_primary_nid_locked(lnet_nid_t nid)
 {
 lnet_nid_t
 lnet_peer_primary_nid_locked(lnet_nid_t nid)
 {