From: Amir Shehata Date: Wed, 6 Jul 2016 02:36:08 +0000 (-0700) Subject: LU-7734 lnet: Routing fixes part 2 X-Git-Tag: 2.9.53~47^2~13 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=b704f1b8bfa432c5e1a47048e2770e47721a0459;p=fs%2Flustre-release.git LU-7734 lnet: Routing fixes part 2 Fix lnet_select_pathway() to handle the routing cases correctly. The following general cases are handled: . Non-MR directly connected . Non-MR not directly connected . MR Directly connected . MR Not directly connected . No gateway . Gateway is non-mr . Gateway is mr Signed-off-by: Amir Shehata Change-Id: If2d16b797b94421e78a9f2a254a250a440f8b244 Reviewed-on: http://review.whamcloud.com/21167 --- diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index c98ec94..6a617d5 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -1325,6 +1325,7 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid, __u32 seq; int cpt, cpt2, rc; bool routing; + bool routing2; bool ni_is_pref; bool preferred; int best_credits; @@ -1348,6 +1349,7 @@ again: best_gw = NULL; local_net = NULL; routing = false; + routing2 = false; seq = lnet_get_dlc_seq_locked(); @@ -1382,7 +1384,7 @@ again: } /* - * STEP 1: first jab at determineing best_ni + * STEP 1: first jab at determining best_ni * if src_nid is explicitly specified, then best_ni is already * pre-determiend for us. Otherwise we need to select the best * one to use later on @@ -1396,18 +1398,122 @@ again: libcfs_nid2str(src_nid)); return -EINVAL; } + } + + if (msg->msg_type == LNET_MSG_REPLY || + msg->msg_type == LNET_MSG_ACK || + !peer->lp_multi_rail) { + /* + * for replies we want to respond on the same peer_ni we + * received the message on if possible. If not, then pick + * a peer_ni to send to + * + * if the peer is non-multi-rail then you want to send to + * the dst_nid provided as well. + * + * It is expected to find the lpni using dst_nid, since we + * created it earlier. + */ + best_lpni = lnet_find_peer_ni_locked(dst_nid); + if (best_lpni) + lnet_peer_ni_decref_locked(best_lpni); + + if (best_lpni && !lnet_get_net_locked(LNET_NIDNET(dst_nid))) { + /* + * this lpni is not on a local network so we need + * to route this reply. + */ + best_gw = lnet_find_route_locked(NULL, + best_lpni->lpni_nid, + rtr_nid); + if (best_gw) { + /* + * RULE: Each node considers only the next-hop + * + * We're going to route the message, so change the peer to + * the router. + */ + LASSERT(best_gw->lpni_peer_net); + LASSERT(best_gw->lpni_peer_net->lpn_peer); + peer = best_gw->lpni_peer_net->lpn_peer; + + /* + * if the router is not multi-rail then use the best_gw + * found to send the message to + */ + if (!peer->lp_multi_rail) + best_lpni = best_gw; + else + best_lpni = NULL; - if (best_ni->ni_net->net_id != LNET_NIDNET(dst_nid)) { + routing = true; + } else { + best_lpni = NULL; + } + } else if (!best_lpni) { lnet_net_unlock(cpt); - LCONSOLE_WARN("No route to %s via from %s\n", - libcfs_nid2str(dst_nid), - libcfs_nid2str(src_nid)); + CERROR("unable to send msg_type %d to " + "originating %s. Destination NID not in DB\n", + msg->msg_type, libcfs_nid2str(dst_nid)); return -EINVAL; } - goto pick_peer; } /* + * if the peer is not MR capable, then we should always send to it + * using the first NI in the NET we determined. + */ + if (!peer->lp_multi_rail) { + if (!best_lpni) { + lnet_net_unlock(cpt); + CERROR("no route to %s\n", + libcfs_nid2str(dst_nid)); + return -EHOSTUNREACH; + } + + /* best ni could be set because src_nid was provided */ + if (!best_ni) { + best_ni = lnet_net2ni_locked(best_lpni->lpni_net->net_id, cpt); + if (!best_ni) { + lnet_net_unlock(cpt); + CERROR("no path to %s from net %s\n", + libcfs_nid2str(best_lpni->lpni_nid), + libcfs_net2str(best_lpni->lpni_net->net_id)); + return -EHOSTUNREACH; + } + } + } + + if (best_ni == the_lnet.ln_loni) { + /* No send credit hassles with LOLND */ + lnet_ni_addref_locked(best_ni, cpt); + msg->msg_hdr.dest_nid = cpu_to_le64(best_ni->ni_nid); + if (!msg->msg_routing) + msg->msg_hdr.src_nid = cpu_to_le64(best_ni->ni_nid); + msg->msg_target.nid = best_ni->ni_nid; + lnet_msg_commit(msg, cpt); + msg->msg_txni = best_ni; + lnet_net_unlock(cpt); + + return LNET_CREDIT_OK; + } + + /* + * if we already found a best_ni because src_nid is specified and + * best_lpni because we are replying to a message then just send + * the message + */ + if (best_ni && best_lpni) + goto send; + + /* + * If we already found a best_ni because src_nid is specified then + * pick the peer then send the message + */ + if (best_ni) + goto pick_peer; + + /* * Decide whether we need to route to peer_ni. * Get the local net that I need to be on to be able to directly * send to that peer. @@ -1423,7 +1529,7 @@ again: continue; local_net = lnet_get_net_locked(peer_net->lpn_net_id); - if (!local_net) { + if (!local_net && !routing) { struct lnet_peer_ni *net_gw; /* * go through each peer_ni on that peer_net and @@ -1444,14 +1550,11 @@ again: if (!best_gw) { best_gw = net_gw; - best_lpni = lpni; } else { rc = lnet_compare_peers(net_gw, best_gw); - if (rc > 0) { + if (rc > 0) best_gw = net_gw; - best_lpni = lpni; - } } } @@ -1460,9 +1563,9 @@ again: local_net = lnet_get_net_locked (LNET_NIDNET(best_gw->lpni_nid)); - routing = true; + routing2 = true; } else { - routing = false; + routing2 = false; best_gw = NULL; } @@ -1523,12 +1626,17 @@ again: } } - /* - * if the peer is not MR capable, then we should always send to it - * using the first NI in the NET we determined. - */ - if (!peer->lp_multi_rail && local_net != NULL) - best_ni = lnet_net2ni_locked(local_net->net_id, cpt); + if (routing2) { + /* + * RULE: Each node considers only the next-hop + * + * We're going to route the message, so change the peer to + * the router. + */ + LASSERT(best_gw->lpni_peer_net); + LASSERT(best_gw->lpni_peer_net->lpn_peer); + peer = best_gw->lpni_peer_net->lpn_peer; + } if (!best_ni) { lnet_net_unlock(cpt); @@ -1544,42 +1652,11 @@ again: */ best_ni->ni_seq++; - if (routing) - goto send; - pick_peer: - if (best_ni == the_lnet.ln_loni) { - /* No send credit hassles with LOLND */ - lnet_ni_addref_locked(best_ni, cpt); - msg->msg_hdr.dest_nid = cpu_to_le64(best_ni->ni_nid); - if (!msg->msg_routing) - msg->msg_hdr.src_nid = cpu_to_le64(best_ni->ni_nid); - msg->msg_target.nid = best_ni->ni_nid; - lnet_msg_commit(msg, cpt); - msg->msg_txni = best_ni; - lnet_net_unlock(cpt); - - return LNET_CREDIT_OK; - } - - if (msg->msg_type == LNET_MSG_REPLY || - msg->msg_type == LNET_MSG_ACK) { - /* - * for replies we want to respond on the same peer_ni we - * received the message on if possible. If not, then pick - * a peer_ni to send to - */ - best_lpni = lnet_find_peer_ni_locked(dst_nid); - if (best_lpni) { - lnet_peer_ni_decref_locked(best_lpni); - goto send; - } else { - CDEBUG(D_NET, "unable to send msg_type %d to " - "originating %s\n", msg->msg_type, - libcfs_nid2str(dst_nid)); - } - } - + /* + * At this point the best_ni is on a local network on which + * the peer has a peer_ni as well + */ peer_net = lnet_peer_get_net_locked(peer, best_ni->ni_net->net_id); /* @@ -1609,13 +1686,16 @@ pick_peer: libcfs_nid2str(best_gw->lpni_nid), lnet_msgtyp2str(msg->msg_type), msg->msg_len); - best_lpni = lnet_find_peer_ni_locked(dst_nid); - LASSERT(best_lpni != NULL); - lnet_peer_ni_decref_locked(best_lpni); - - routing = true; - - goto send; + routing2 = true; + /* + * RULE: Each node considers only the next-hop + * + * We're going to route the message, so change the peer to + * the router. + */ + LASSERT(best_gw->lpni_peer_net); + LASSERT(best_gw->lpni_peer_net->lpn_peer); + peer = best_gw->lpni_peer_net->lpn_peer; } else if (!lnet_is_peer_net_healthy_locked(peer_net)) { /* * this peer_net is unhealthy but we still have an opportunity @@ -1638,6 +1718,7 @@ pick_peer: lpni = NULL; best_lpni_credits = INT_MIN; preferred = false; + best_lpni = NULL; while ((lpni = lnet_get_next_peer_ni_locked(peer, peer_net, lpni))) { /* * if this peer ni is not healthy just skip it, no point in @@ -1690,6 +1771,8 @@ pick_peer: } send: + routing = routing || routing2; + /* * Increment sequence number of the peer selected so that we * pick the next one in Round Robin. @@ -1697,13 +1780,6 @@ send: best_lpni->lpni_seq++; /* - * When routing the best gateway found acts as the best peer - * NI to send to. - */ - if (routing) - best_lpni = best_gw; - - /* * grab a reference on the peer_ni so it sticks around even if * we need to drop and relock the lnet_net_lock below. */ diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index aa85c5e..a1f6990 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -258,11 +258,18 @@ lnet_try_destroy_peer_hierarchy_locked(struct lnet_peer_ni *lpni) } /* called with lnet_net_lock LNET_LOCK_EX held */ -static void +static int lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni) { struct lnet_peer_table *ptable = NULL; + /* don't remove a peer_ni if it's also a gateway */ + if (lpni->lpni_rtr_refcount > 0) { + CERROR("Peer NI %s is a gateway. Can not delete it\n", + libcfs_nid2str(lpni->lpni_nid)); + return -EBUSY; + } + lnet_peer_remove_from_remote_list(lpni); /* remove peer ni from the hash list. */ @@ -293,6 +300,8 @@ lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni) /* decrement reference on peer */ lnet_peer_ni_decref_locked(lpni); + + return 0; } void lnet_peer_uninit() @@ -311,17 +320,22 @@ void lnet_peer_uninit() lnet_net_unlock(LNET_LOCK_EX); } -static void +static int lnet_peer_del_locked(struct lnet_peer *peer) { struct lnet_peer_ni *lpni = NULL, *lpni2; + int rc = 0, rc2 = 0; lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni); while (lpni != NULL) { lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni); - lnet_peer_ni_del_locked(lpni); + rc = lnet_peer_ni_del_locked(lpni); + if (rc != 0) + rc2 = rc; lpni = lpni2; } + + return rc2; } static void @@ -893,6 +907,7 @@ lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid) lnet_nid_t local_nid; struct lnet_peer *peer; struct lnet_peer_ni *lpni; + int rc; if (key_nid == LNET_NID_ANY) return -EINVAL; @@ -913,17 +928,17 @@ lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid) * entire peer */ lnet_net_lock(LNET_LOCK_EX); - lnet_peer_del_locked(peer); + rc = lnet_peer_del_locked(peer); lnet_net_unlock(LNET_LOCK_EX); - return 0; + return rc; } lnet_net_lock(LNET_LOCK_EX); - lnet_peer_ni_del_locked(lpni); + rc = lnet_peer_ni_del_locked(lpni); lnet_net_unlock(LNET_LOCK_EX); - return 0; + return rc; } void