Whamcloud - gitweb
LU-2133 lnet: wrong peer state reported
authorIsaac Huang <he.huang@intel.com>
Tue, 19 Mar 2013 19:20:53 +0000 (13:20 -0600)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 13 Apr 2013 01:31:56 +0000 (21:31 -0400)
When peer health support is disabled, peer state as shown in
/proc/sys/lnet/peers and by IOC_LIBCFS_DEBUG_PEER should be "NA".
Otherwise wrong states could be shown because the peer aliveness time
stamps are not refreshed when peer health is disabled.

Signed-off-by: Isaac Huang <he.huang@intel.com>
Change-Id: Ice5c6651ca5d2620495a0c37de9a22aebd644d0a
Reviewed-on: http://review.whamcloud.com/5955
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/include/lnet/lib-types.h
lnet/lnet/lib-move.c

index 73b9580..2233638 100644 (file)
@@ -518,7 +518,10 @@ struct lnet_peer_table {
        cfs_list_t              *pt_hash;       /* NID->peer hash */
 };
 
-#define lnet_peer_aliveness_enabled(lp) ((lp)->lp_ni->ni_peertimeout > 0)
+/* peer aliveness is enabled only on routers for peers in a network where the
+ * lnet_ni_t::ni_peertimeout has been set to a positive value */
+#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
+                                        (lp)->lp_ni->ni_peertimeout > 0)
 
 typedef struct {
        cfs_list_t              lr_list;        /* chain on net */
index be082c1..41a701c 100644 (file)
@@ -723,11 +723,10 @@ lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg)
 void
 lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp)
 {
-       cfs_time_t      last_alive = 0;
+       cfs_time_t last_alive = 0;
 
        LASSERT(lnet_peer_aliveness_enabled(lp));
        LASSERT(ni->ni_lnd->lnd_query != NULL);
-       LASSERT(the_lnet.ln_routing == 1);
 
        lnet_net_unlock(lp->lp_cpt);
        (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
@@ -747,7 +746,6 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
         cfs_time_t deadline;
 
         LASSERT (lnet_peer_aliveness_enabled(lp));
-        LASSERT (the_lnet.ln_routing == 1);
 
         /* Trust lnet_notify() if it has more recent aliveness news, but
          * ignore the initial assumed death (see lnet_peers_start_down()).
@@ -779,10 +777,6 @@ lnet_peer_alive_locked (lnet_peer_t *lp)
 {
         cfs_time_t now = cfs_time_current();
 
-        /* LU-630: only router checks peer health. */
-        if (the_lnet.ln_routing == 0)
-                return 1;
-
         if (!lnet_peer_aliveness_enabled(lp))
                 return -ENODEV;