From: isaac Date: Mon, 7 Dec 2009 14:39:47 +0000 (+0000) Subject: i=maxim,i=chris,b=21459: X-Git-Tag: GIT_EPOCH_B_HD_KDMU~12 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=1a6613ee0bd6628dad994da64d73e313008f696b i=maxim,i=chris,b=21459: - should update lp_alive for non-router peers. --- diff --git a/lnet/ChangeLog b/lnet/ChangeLog index 0bc3834..eed4732 100644 --- a/lnet/ChangeLog +++ b/lnet/ChangeLog @@ -17,6 +17,10 @@ Bugzilla : Description: Details : +Severity : minor +Bugzilla : 21459 +Description: should update lp_alive for non-router peers + Severity : enhancement Bugzilla : 15332 Description: LNet router shuffler. diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 4ae355c..3eec677 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -451,6 +451,8 @@ typedef struct lnet_peer { lnet_rc_data_t *lp_rcd; /* router checker state */ } lnet_peer_t; +#define lnet_peer_aliveness_enabled(lp) ((lp)->lp_ni->ni_peertimeout > 0) + typedef struct { struct list_head lr_list; /* chain on net */ lnet_peer_t *lr_gateway; /* router node */ diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 156a09a..40195ce 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -915,8 +915,7 @@ lnet_ni_peer_alive(lnet_peer_t *lp) cfs_time_t last_alive = 0; lnet_ni_t *ni = lp->lp_ni; - LASSERT (ni != NULL); - LASSERT (ni->ni_peertimeout > 0); + LASSERT (lnet_peer_aliveness_enabled(lp)); LASSERT (ni->ni_lnd->lnd_query != NULL); LNET_UNLOCK(); @@ -934,12 +933,10 @@ lnet_ni_peer_alive(lnet_peer_t *lp) static inline int lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) { - lnet_ni_t *ni = lp->lp_ni; - cfs_time_t deadline; - int alive; + int alive; + cfs_time_t deadline; - LASSERT (ni != NULL); - LASSERT (ni->ni_peertimeout > 0); + LASSERT (lnet_peer_aliveness_enabled(lp)); /* Trust lnet_notify() if it has more recent aliveness news, but * ignore the initial assumed death (see lnet_peers_start_down()). @@ -949,12 +946,15 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) return 0; deadline = cfs_time_add(lp->lp_last_alive, - cfs_time_seconds(ni->ni_peertimeout)); + cfs_time_seconds(lp->lp_ni->ni_peertimeout)); alive = cfs_time_after(deadline, now); - /* Update obsolete lp_alive */ - if (alive && !lp->lp_alive && lp->lp_timestamp != 0 && - cfs_time_before(lp->lp_timestamp, lp->lp_last_alive)) + /* Update obsolete lp_alive except for routers assumed to be dead + * initially, because router checker would update aliveness in this + * case, and moreover lp_last_alive at peer creation is assumed. + */ + if (alive && !lp->lp_alive && + !(lnet_isrouter(lp) && lp->lp_alive_count == 0)) lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); return alive; @@ -966,12 +966,9 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) int lnet_peer_alive_locked (lnet_peer_t *lp) { - lnet_ni_t *ni = lp->lp_ni; - cfs_time_t now = cfs_time_current(); - - LASSERT (ni != NULL); + cfs_time_t now = cfs_time_current(); - if (ni->ni_peertimeout <= 0) /* disabled */ + if (!lnet_peer_aliveness_enabled(lp)) return -ENODEV; if (lnet_peer_is_alive(lp, now)) @@ -992,7 +989,8 @@ lnet_peer_alive_locked (lnet_peer_t *lp) "%d < %d (%d/%d)\n", libcfs_nid2str(lp->lp_nid), (int)now, (int)next_query, - lnet_queryinterval, ni->ni_peertimeout); + lnet_queryinterval, + lp->lp_ni->ni_peertimeout); return 0; } } diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 2e7b9a4..8baf199 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -253,7 +253,7 @@ lnet_debug_peer(lnet_nid_t nid) return; } - if (lnet_isrouter(lp) || lp->lp_ni->ni_peertimeout > 0) + if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp)) aliveness = lp->lp_alive ? "up" : "down"; CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n", diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index 76d82b3..f04f7ba 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -444,7 +444,7 @@ int LL_PROC_PROTO(proc_lnet_peers) int txqnob = peer->lp_txqnob; if (lnet_isrouter(peer) || - peer->lp_ni->ni_peertimeout > 0) + lnet_peer_aliveness_enabled(peer)) aliveness = peer->lp_alive ? "up" : "down"; s += snprintf(s, tmpstr + tmpsiz - s,