From 230ce576f0735e2d4d14b86bb4b8f11f6f7394ff Mon Sep 17 00:00:00 2001 From: isaac Date: Mon, 7 Dec 2009 19:31:38 +0000 Subject: [PATCH] i=maxim,i=chris,b=21459: - should update lp_alive for non-router peers. --- lnet/ChangeLog | 4 ++++ lnet/include/lnet/lib-types.h | 2 ++ lnet/lnet/lib-move.c | 32 +++++++++++++++----------------- lnet/lnet/peer.c | 2 +- lnet/lnet/router_proc.c | 2 +- 5 files changed, 23 insertions(+), 19 deletions(-) diff --git a/lnet/ChangeLog b/lnet/ChangeLog index 7fd91a5..d5e212d 100644 --- a/lnet/ChangeLog +++ b/lnet/ChangeLog @@ -12,6 +12,10 @@ tbd Sun Microsystems, Inc. mxlnd - MX 1.2.1 or later, ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x +Severity : minor +Bugzilla : 21459 +Description: should update lp_alive for non-router peers. + Severity : enhancement Bugzilla : 15332 Description: LNet router shuffler. diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index e321e25..f34ffb8 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -453,6 +453,8 @@ typedef struct lnet_peer { lnet_rc_data_t *lp_rcd; /* router checker state */ } lnet_peer_t; +#define lnet_peer_aliveness_enabled(lp) ((lp)->lp_ni->ni_peertimeout > 0) + typedef struct { struct list_head lr_list; /* chain on net */ lnet_peer_t *lr_gateway; /* router node */ diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index e3d6042..8646e3f 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -914,8 +914,7 @@ lnet_ni_peer_alive(lnet_peer_t *lp) cfs_time_t last_alive = 0; lnet_ni_t *ni = lp->lp_ni; - LASSERT (ni != NULL); - LASSERT (ni->ni_peertimeout > 0); + LASSERT (lnet_peer_aliveness_enabled(lp)); LASSERT (ni->ni_lnd->lnd_query != NULL); LNET_UNLOCK(); @@ -933,12 +932,10 @@ lnet_ni_peer_alive(lnet_peer_t *lp) static inline int lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) { - lnet_ni_t *ni = lp->lp_ni; - cfs_time_t deadline; - int alive; + int alive; + cfs_time_t deadline; - LASSERT (ni != NULL); - LASSERT (ni->ni_peertimeout > 0); + LASSERT (lnet_peer_aliveness_enabled(lp)); /* Trust lnet_notify() if it has more recent aliveness news, but * ignore the initial assumed death (see lnet_peers_start_down()). @@ -948,12 +945,15 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) return 0; deadline = cfs_time_add(lp->lp_last_alive, - cfs_time_seconds(ni->ni_peertimeout)); + cfs_time_seconds(lp->lp_ni->ni_peertimeout)); alive = cfs_time_after(deadline, now); - /* Update obsolete lp_alive */ - if (alive && !lp->lp_alive && lp->lp_timestamp != 0 && - cfs_time_before(lp->lp_timestamp, lp->lp_last_alive)) + /* Update obsolete lp_alive except for routers assumed to be dead + * initially, because router checker would update aliveness in this + * case, and moreover lp_last_alive at peer creation is assumed. + */ + if (alive && !lp->lp_alive && + !(lnet_isrouter(lp) && lp->lp_alive_count == 0)) lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); return alive; @@ -965,12 +965,9 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) int lnet_peer_alive_locked (lnet_peer_t *lp) { - lnet_ni_t *ni = lp->lp_ni; - cfs_time_t now = cfs_time_current(); - - LASSERT (ni != NULL); + cfs_time_t now = cfs_time_current(); - if (ni->ni_peertimeout <= 0) /* disabled */ + if (!lnet_peer_aliveness_enabled(lp)) return -ENODEV; if (lnet_peer_is_alive(lp, now)) @@ -991,7 +988,8 @@ lnet_peer_alive_locked (lnet_peer_t *lp) "%d < %d (%d/%d)\n", libcfs_nid2str(lp->lp_nid), (int)now, (int)next_query, - lnet_queryinterval, ni->ni_peertimeout); + lnet_queryinterval, + lp->lp_ni->ni_peertimeout); return 0; } } diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 2e7b9a4..8baf199 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -253,7 +253,7 @@ lnet_debug_peer(lnet_nid_t nid) return; } - if (lnet_isrouter(lp) || lp->lp_ni->ni_peertimeout > 0) + if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp)) aliveness = lp->lp_alive ? "up" : "down"; CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n", diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index 76d82b3..f04f7ba 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -444,7 +444,7 @@ int LL_PROC_PROTO(proc_lnet_peers) int txqnob = peer->lp_txqnob; if (lnet_isrouter(peer) || - peer->lp_ni->ni_peertimeout > 0) + lnet_peer_aliveness_enabled(peer)) aliveness = peer->lp_alive ? "up" : "down"; s += snprintf(s, tmpstr + tmpsiz - s, -- 1.8.3.1