From 00243fccc1977e4dee8041f4c0f9854373598dc2 Mon Sep 17 00:00:00 2001 From: Isaac Huang Date: Tue, 19 Mar 2013 13:20:53 -0600 Subject: [PATCH] LU-2133 lnet: wrong peer state reported When peer health support is disabled, peer state as shown in /proc/sys/lnet/peers and by IOC_LIBCFS_DEBUG_PEER should be "NA". Otherwise wrong states could be shown because the peer aliveness time stamps are not refreshed when peer health is disabled. Signed-off-by: Isaac Huang Change-Id: Ice5c6651ca5d2620495a0c37de9a22aebd644d0a Reviewed-on: http://review.whamcloud.com/5955 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Doug Oucharek Reviewed-by: Liang Zhen Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-types.h | 5 ++++- lnet/lnet/lib-move.c | 8 +------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 73b9580..2233638 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -518,7 +518,10 @@ struct lnet_peer_table { cfs_list_t *pt_hash; /* NID->peer hash */ }; -#define lnet_peer_aliveness_enabled(lp) ((lp)->lp_ni->ni_peertimeout > 0) +/* peer aliveness is enabled only on routers for peers in a network where the + * lnet_ni_t::ni_peertimeout has been set to a positive value */ +#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \ + (lp)->lp_ni->ni_peertimeout > 0) typedef struct { cfs_list_t lr_list; /* chain on net */ diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index be082c1..41a701c 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -723,11 +723,10 @@ lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg) void lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp) { - cfs_time_t last_alive = 0; + cfs_time_t last_alive = 0; LASSERT(lnet_peer_aliveness_enabled(lp)); LASSERT(ni->ni_lnd->lnd_query != NULL); - LASSERT(the_lnet.ln_routing == 1); lnet_net_unlock(lp->lp_cpt); (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive); @@ -747,7 +746,6 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) cfs_time_t deadline; LASSERT (lnet_peer_aliveness_enabled(lp)); - LASSERT (the_lnet.ln_routing == 1); /* Trust lnet_notify() if it has more recent aliveness news, but * ignore the initial assumed death (see lnet_peers_start_down()). @@ -779,10 +777,6 @@ lnet_peer_alive_locked (lnet_peer_t *lp) { cfs_time_t now = cfs_time_current(); - /* LU-630: only router checks peer health. */ - if (the_lnet.ln_routing == 0) - return 1; - if (!lnet_peer_aliveness_enabled(lp)) return -ENODEV; -- 1.8.3.1