*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/lib-move.c
*
lnet_nid_t nid;
int healthv;
int rc;
+ time64_t now;
/*
* splice the recovery queue on a local queue. We will iterate
&local_queue);
lnet_net_unlock(0);
+ now = ktime_get_seconds();
+
list_for_each_entry_safe(ni, tmp, &local_queue, ni_recovery) {
/*
* if an NI is being deleted or it is now healthy, there
ni->ni_recovery_state &= ~LNET_NI_RECOVERY_FAILED;
}
+
lnet_ni_unlock(ni);
- lnet_net_unlock(0);
+ if (now < ni->ni_next_ping) {
+ lnet_net_unlock(0);
+ continue;
+ }
+
+ lnet_net_unlock(0);
CDEBUG(D_NET, "attempting to recover local ni: %s\n",
libcfs_nid2str(ni->ni_nid));
LNetMDUnlink(mdh);
continue;
}
- /*
- * Same note as in lnet_recover_peer_nis(). When
- * we're sending the ping, the NI is free to be
- * deleted or manipulated. By this point it
- * could've been added back on the recovery queue,
- * and a refcount taken on it.
- * So we can't just add it blindly again or we'll
- * corrupt the queue. We must check under lock if
- * it's not on any list and if not then add it
- * to the processed list, which will eventually be
- * spliced back on to the recovery queue.
- */
+ ni->ni_ping_count++;
+
ni->ni_ping_mdh = mdh;
- if (list_empty(&ni->ni_recovery)) {
- list_add_tail(&ni->ni_recovery, &processed_list);
- lnet_ni_addref_locked(ni, 0);
- }
- lnet_net_unlock(0);
+ lnet_ni_add_to_recoveryq_locked(ni, &processed_list,
+ now);
- lnet_ni_lock(ni);
- if (rc)
+ if (rc) {
+ lnet_ni_lock(ni);
ni->ni_recovery_state &= ~LNET_NI_RECOVERY_PENDING;
- }
- lnet_ni_unlock(ni);
+ lnet_ni_unlock(ni);
+ }
+ lnet_net_unlock(0);
+ } else
+ lnet_ni_unlock(ni);
}
/*
goto drop;
}
- if (lnet_drop_asym_route && for_me &&
+ /* If this message was forwarded to us from a router then we may need
+ * to update router aliveness or check for an asymmetrical route
+ * (or both)
+ */
+ if (((lnet_drop_asym_route && for_me) ||
+ !lpni->lpni_peer_net->lpn_peer->lp_alive) &&
LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
__u32 src_net_id = LNET_NIDNET(src_nid);
struct lnet_peer *gw = lpni->lpni_peer_net->lpn_peer;
list_for_each_entry(route, &gw->lp_routes, lr_gwlist) {
if (route->lr_net == src_net_id) {
found = true;
- break;
+ /* If we're transitioning the gateway from
+ * dead -> alive, and discovery is disabled
+ * locally or on the gateway, then we need to
+ * update the cached route aliveness for each
+ * route to the src_nid's net.
+ *
+ * Otherwise, we're only checking for
+ * symmetrical route, and we can break the
+ * loop
+ */
+ if (!gw->lp_alive &&
+ lnet_is_discovery_disabled(gw))
+ lnet_set_route_aliveness(route, true);
+ else
+ break;
}
}
- if (!found) {
+ if (lnet_drop_asym_route && for_me && !found) {
lnet_net_unlock(cpt);
/* we would not use from_nid to route a message to
* src_nid
lnet_msg_free(msg);
goto drop;
}
+ if (!gw->lp_alive) {
+ struct lnet_peer_net *lpn;
+ struct lnet_peer_ni *lpni2;
+
+ gw->lp_alive = true;
+ /* Mark all remote NIs on src_nid's net UP */
+ lpn = lnet_peer_get_net_locked(gw, src_net_id);
+ if (lpn)
+ list_for_each_entry(lpni2, &lpn->lpn_peer_nis,
+ lpni_peer_nis)
+ lpni2->lpni_ns_status = LNET_NI_STATUS_UP;
+ }
}
lpni->lpni_last_alive = ktime_get_seconds();