Whamcloud - gitweb
LU-2133 lnet: wrong peer state reported
[fs/lustre-release.git] / lnet / lnet / lib-move.c
index e8dc8a0..41a701c 100644 (file)
@@ -26,6 +26,8 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -721,11 +723,10 @@ lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg)
 void
 lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp)
 {
-       cfs_time_t      last_alive = 0;
+       cfs_time_t last_alive = 0;
 
        LASSERT(lnet_peer_aliveness_enabled(lp));
        LASSERT(ni->ni_lnd->lnd_query != NULL);
-       LASSERT(the_lnet.ln_routing == 1);
 
        lnet_net_unlock(lp->lp_cpt);
        (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
@@ -745,7 +746,6 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
         cfs_time_t deadline;
 
         LASSERT (lnet_peer_aliveness_enabled(lp));
-        LASSERT (the_lnet.ln_routing == 1);
 
         /* Trust lnet_notify() if it has more recent aliveness news, but
          * ignore the initial assumed death (see lnet_peers_start_down()).
@@ -777,10 +777,6 @@ lnet_peer_alive_locked (lnet_peer_t *lp)
 {
         cfs_time_t now = cfs_time_current();
 
-        /* LU-630: only router checks peer health. */
-        if (the_lnet.ln_routing == 0)
-                return 1;
-
         if (!lnet_peer_aliveness_enabled(lp))
                 return -ENODEV;
 
@@ -1278,11 +1274,11 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
                LASSERT(src_nid != LNET_NID_ANY);
                lnet_msg_commit(msg, cpt);
 
-                if (!msg->msg_routing)
-                        msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
+               if (!msg->msg_routing)
+                       msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
 
-                if (src_ni == the_lnet.ln_loni) {
-                        /* No send credit hassles with LOLND */
+               if (src_ni == the_lnet.ln_loni) {
+                       /* No send credit hassles with LOLND */
                        lnet_net_unlock(cpt);
                        lnet_ni_send(src_ni, msg);
 
@@ -1416,8 +1412,8 @@ lnet_recv_put(lnet_ni_t *ni, lnet_msg_t *msg)
 
        lnet_build_msg_event(msg, LNET_EVENT_PUT);
 
-        /* Must I ACK?  If so I'll grab the ack_wmd out of the header and put
-         * it back into the ACK during lnet_finalize() */
+       /* Must I ACK?  If so I'll grab the ack_wmd out of the header and put
+        * it back into the ACK during lnet_finalize() */
        msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
                        (msg->msg_md->md_options & LNET_MD_ACK_DISABLE) == 0);
 
@@ -1531,7 +1527,7 @@ lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get)
        rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
        if (rc < 0) {
                /* didn't get as far as lnet_ni_send() */
-                CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
+               CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
                       libcfs_nid2str(ni->ni_nid),
                       libcfs_id2str(info.mi_id), rc);
 
@@ -2021,9 +2017,12 @@ lnet_drop_delayed_msg_list(cfs_list_t *head, char *reason)
                lnet_drop_message(msg->msg_rxpeer->lp_ni,
                                  msg->msg_rxpeer->lp_cpt,
                                  msg->msg_private, msg->msg_len);
-               /* NB: message will not generate event because w/o attached MD,
-                * so we just use 0 as the third parameter */
-               lnet_finalize(msg->msg_rxpeer->lp_ni, msg, 0);
+               /*
+                * NB: message will not generate event because w/o attached MD,
+                * but we still should give error code so lnet_msg_decommit()
+                * can skip counters operations and other checks.
+                */
+               lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
        }
 }
 
@@ -2408,6 +2407,7 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
        int                     hops;
        int                     cpt;
        __u32                   order = 2;
+       cfs_list_t              *rn_list;
 
         /* if !local_nid_dist_zero, I don't return a distance of 0 ever
          * (when lustre sees a distance of 0, it substitutes 0@lo), so I
@@ -2442,13 +2442,14 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
                         if (orderp != NULL)
                                 *orderp = order;
                        lnet_net_unlock(cpt);
-                        return 1;
-                }
+                       return 1;
+               }
 
-                order++;
-        }
+               order++;
+       }
 
-        cfs_list_for_each (e, &the_lnet.ln_remote_nets) {
+       rn_list = lnet_net2rnethash(dstnet);
+       cfs_list_for_each(e, rn_list) {
                 rnet = cfs_list_entry(e, lnet_remotenet_t, lrn_list);
 
                 if (rnet->lrn_net == dstnet) {