Whamcloud - gitweb
LU-12537 lnet: Sync the start of discovery and monitor threads
[fs/lustre-release.git] / lnet / lnet / lib-move.c
index 7067164..c1d4e84 100644 (file)
@@ -796,12 +796,32 @@ lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg)
        return rc;
 }
 
+static bool
+lnet_is_peer_deadline_passed(struct lnet_peer_ni *lpni, time64_t now)
+{
+       time64_t deadline;
+
+       deadline = lpni->lpni_last_alive +
+                  lpni->lpni_net->net_tunables.lct_peer_timeout;
+
+       /*
+        * assume peer_ni is alive as long as we're within the configured
+        * peer timeout
+        */
+       if (deadline > now)
+               return false;
+
+       return true;
+}
+
 /* NB: returns 1 when alive, 0 when dead, negative when error;
  *     may drop the lnet_net_lock */
 static int
 lnet_peer_alive_locked(struct lnet_ni *ni, struct lnet_peer_ni *lpni,
                       struct lnet_msg *msg)
 {
+       time64_t now = ktime_get_seconds();
+
        if (!lnet_peer_aliveness_enabled(lpni))
                return -ENODEV;
 
@@ -821,6 +841,9 @@ lnet_peer_alive_locked(struct lnet_ni *ni, struct lnet_peer_ni *lpni,
            msg->msg_type == LNET_MSG_REPLY)
                return 1;
 
+       if (!lnet_is_peer_deadline_passed(lpni, now))
+               return true;
+
        return lnet_is_peer_ni_alive(lpni);
 }
 
@@ -2010,21 +2033,59 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
 {
        int rc;
        struct lnet_peer *gw;
+       struct lnet_peer *lp;
+       struct lnet_peer_net *lpn;
+       struct lnet_peer_net *best_lpn = NULL;
+       struct lnet_remotenet *rnet;
        struct lnet_route *best_route;
        struct lnet_route *last_route;
        struct lnet_peer_ni *lpni = NULL;
+       struct lnet_peer_ni *gwni = NULL;
        lnet_nid_t src_nid = sd->sd_src_nid;
 
-       best_route = lnet_find_route_locked(NULL, LNET_NIDNET(dst_nid),
+       /* we've already looked up the initial lpni using dst_nid */
+       lpni = sd->sd_best_lpni;
+       /* the peer tree must be in existence */
+       LASSERT(lpni && lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer);
+       lp = lpni->lpni_peer_net->lpn_peer;
+
+       list_for_each_entry(lpn, &lp->lp_peer_nets, lpn_peer_nets) {
+               /* is this remote network reachable?  */
+               rnet = lnet_find_rnet_locked(lpn->lpn_net_id);
+               if (!rnet)
+                       continue;
+
+               if (!best_lpn)
+                       best_lpn = lpn;
+
+               if (best_lpn->lpn_seq <= lpn->lpn_seq)
+                       continue;
+
+               best_lpn = lpn;
+       }
+
+       if (!best_lpn) {
+               CERROR("peer %s has no available nets \n",
+                      libcfs_nid2str(sd->sd_dst_nid));
+               return -EHOSTUNREACH;
+       }
+
+       sd->sd_best_lpni = lnet_find_best_lpni_on_net(sd, lp, best_lpn->lpn_net_id);
+       if (!sd->sd_best_lpni) {
+               CERROR("peer %s down\n", libcfs_nid2str(sd->sd_dst_nid));
+               return -EHOSTUNREACH;
+       }
+
+       best_route = lnet_find_route_locked(NULL, best_lpn->lpn_net_id,
                                            sd->sd_rtr_nid, &last_route,
-                                           &lpni);
+                                           &gwni);
        if (!best_route) {
                CERROR("no route to %s from %s\n",
                       libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid));
                return -EHOSTUNREACH;
        }
 
-       if (!lpni) {
+       if (!gwni) {
                CERROR("Internal Error. Route expected to %s from %s\n",
                        libcfs_nid2str(dst_nid),
                        libcfs_nid2str(src_nid));
@@ -2032,7 +2093,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
        }
 
        gw = best_route->lr_gateway;
-       LASSERT(gw == lpni->lpni_peer_net->lpn_peer);
+       LASSERT(gw == gwni->lpni_peer_net->lpn_peer);
 
        /*
         * Discover this gateway if it hasn't already been discovered.
@@ -2040,7 +2101,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
         * completed
         */
        sd->sd_msg->msg_src_nid_param = sd->sd_src_nid;
-       rc = lnet_initiate_peer_discovery(lpni, sd->sd_msg, sd->sd_rtr_nid,
+       rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_rtr_nid,
                                          sd->sd_cpt);
        if (rc)
                return rc;
@@ -2060,15 +2121,16 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                return -EFAULT;
        }
 
-       *gw_lpni = lpni;
+       *gw_lpni = gwni;
        *gw_peer = gw;
 
        /*
-        * increment the route sequence number since now we're sure we're
-        * going to use it
+        * increment the sequence numbers since now we're sure we're
+        * going to use this path
         */
        LASSERT(best_route && last_route);
        best_route->lr_seq = last_route->lr_seq + 1;
+       best_lpn->lpn_seq++;
 
        return 0;
 }
@@ -2439,11 +2501,11 @@ lnet_handle_any_mr_dst(struct lnet_send_data *sd)
                return rc;
 
        /*
-        * TODO; One possible enhancement is to run the selection
-        * algorithm on the peer. However for remote peers the credits are
-        * not decremented, so we'll be basically going over the peer NIs
-        * in round robin. An MR router will run the selection algorithm
-        * on the next-hop interfaces.
+        * Now that we must route to the destination, we must consider the
+        * MR case, where the destination has multiple interfaces, some of
+        * which we can route to and others we do not. For this reason we
+        * need to select the destination which we can route to and if
+        * there are multiple, we need to round robin.
         */
        rc = lnet_handle_find_routed_path(sd, sd->sd_dst_nid, &gw_lpni,
                                          &gw_peer);
@@ -2702,8 +2764,13 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
        LASSERT(!msg->msg_tx_committed);
 
        rc = lnet_select_pathway(src_nid, dst_nid, msg, rtr_nid);
-       if (rc < 0)
+       if (rc < 0) {
+               if (rc == -EHOSTUNREACH)
+                       msg->msg_health_status = LNET_MSG_STATUS_REMOTE_ERROR;
+               else
+                       msg->msg_health_status = LNET_MSG_STATUS_LOCAL_ERROR;
                return rc;
+       }
 
        if (rc == LNET_CREDIT_OK)
                lnet_ni_send(msg->msg_txni, msg);
@@ -2817,8 +2884,9 @@ lnet_finalize_expired_responses(bool force)
 
                                nid = rspt->rspt_next_hop_nid;
 
-                               CNETERR("Response timed out: md = %p: nid = %s\n",
-                                       md, libcfs_nid2str(nid));
+                               CDEBUG(D_NET,
+                                      "Response timeout: md = %p: nid = %s\n",
+                                      md, libcfs_nid2str(nid));
                                LNetMDUnlink(rspt->rspt_mdh);
                                lnet_rspt_free(rspt, i);
 
@@ -3378,6 +3446,7 @@ lnet_monitor_thread(void *arg)
        int interval;
        time64_t now;
 
+       wait_for_completion(&the_lnet.ln_started);
        /*
         * The monitor thread takes care of the following:
         *  1. Checks the aliveness of routers
@@ -4376,6 +4445,10 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid,
                        return 0;
                goto drop;
        }
+
+       if (the_lnet.ln_routing)
+               lpni->lpni_last_alive = ktime_get_seconds();
+
        msg->msg_rxpeer = lpni;
        msg->msg_rxni = ni;
        lnet_ni_addref_locked(ni, cpt);