+ rcd->rcd_pinginfo = pi;
+
+ LASSERT (!LNetHandleIsInvalid(the_lnet.ln_rc_eqh));
+ rc = LNetMDBind((lnet_md_t){.start = pi,
+ .user_ptr = rcd,
+ .length = LNET_PINGINFO_SIZE,
+ .threshold = LNET_MD_THRESH_INF,
+ .options = LNET_MD_TRUNCATE,
+ .eq_handle = the_lnet.ln_rc_eqh},
+ LNET_UNLINK,
+ &rcd->rcd_mdh);
+ if (rc < 0) {
+ CERROR("Can't bind MD: %d\n", rc);
+ goto out;
+ }
+ LASSERT(rc == 0);
+
+ LNET_LOCK();
+ /* router table changed or someone has created rcd for this gateway */
+ if (!lnet_isrouter(gateway) || gateway->lp_rcd != NULL) {
+ LNET_UNLOCK();
+ goto out;
+ }
+
+ lnet_peer_addref_locked(gateway);
+ rcd->rcd_gateway = gateway;
+ gateway->lp_rcd = rcd;
+ gateway->lp_ping_notsent = 0;
+
+ return rcd;
+
+ out:
+ if (rcd != NULL) {
+ if (!LNetHandleIsInvalid(rcd->rcd_mdh)) {
+ rc = LNetMDUnlink(rcd->rcd_mdh);
+ LASSERT(rc == 0);
+ }
+ lnet_destroy_rc_data(rcd);
+ }
+
+ LNET_LOCK();
+ return gateway->lp_rcd;
+}
+
+static int
+lnet_router_check_interval (lnet_peer_t *rtr)
+{
+ int secs;
+
+ secs = rtr->lp_alive ? live_router_check_interval :
+ dead_router_check_interval;
+ if (secs < 0)
+ secs = 0;
+
+ return secs;
+}
+
+static void
+lnet_ping_router_locked (lnet_peer_t *rtr)
+{
+ lnet_rc_data_t *rcd = NULL;
+ cfs_time_t now = cfs_time_current();
+ int secs;
+
+ lnet_peer_addref_locked(rtr);
+
+ if (rtr->lp_ping_deadline != 0 && /* ping timed out? */
+ cfs_time_after(now, rtr->lp_ping_deadline))
+ lnet_notify_locked(rtr, 1, 0, now);
+
+ /* Run any outstanding notifications */
+ lnet_ni_notify_locked(rtr->lp_ni, rtr);
+
+ if (!lnet_isrouter(rtr) ||
+ the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
+ /* router table changed or router checker is shutting down */
+ lnet_peer_decref_locked(rtr);
+ return;
+ }
+
+ rcd = rtr->lp_rcd != NULL ?
+ rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
+
+ if (rcd == NULL)
+ return;
+
+ secs = lnet_router_check_interval(rtr);
+
+ CDEBUG(D_NET,
+ "rtr %s %d: deadline %lu ping_notsent %d alive %d "
+ "alive_count %d lp_ping_timestamp %lu\n",
+ libcfs_nid2str(rtr->lp_nid), secs,
+ rtr->lp_ping_deadline, rtr->lp_ping_notsent,
+ rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
+
+ if (secs != 0 && !rtr->lp_ping_notsent &&
+ cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp,
+ cfs_time_seconds(secs)))) {
+ int rc;
+ lnet_process_id_t id;
+ lnet_handle_md_t mdh;
+
+ id.nid = rtr->lp_nid;
+ id.pid = LUSTRE_SRV_LNET_PID;
+ CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
+
+ rtr->lp_ping_notsent = 1;
+ rtr->lp_ping_timestamp = now;
+
+ mdh = rcd->rcd_mdh;
+
+ if (rtr->lp_ping_deadline == 0) {
+ rtr->lp_ping_deadline = \
+ cfs_time_shift(router_ping_timeout);
+ }
+
+ LNET_UNLOCK();
+
+ rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
+ LNET_PROTO_PING_MATCHBITS, 0);
+
+ LNET_LOCK();
+ if (rc != 0)
+ rtr->lp_ping_notsent = 0; /* no event pending */
+ }
+
+ lnet_peer_decref_locked(rtr);
+ return;
+}
+
+int
+lnet_router_checker_start(void)
+{
+ int rc;
+ int eqsz;
+#ifndef __KERNEL__
+ lnet_peer_t *rtr;
+ __u64 version;
+ int nrtr = 0;
+ int router_checker_max_eqsize = 10240;
+
+ LASSERT (check_routers_before_use);
+ LASSERT (dead_router_check_interval > 0);
+
+ LNET_LOCK();
+
+ /* As an approximation, allow each router the same number of
+ * outstanding events as it is allowed outstanding sends */
+ eqsz = 0;
+ version = the_lnet.ln_routers_version;
+ cfs_list_for_each_entry(rtr, &the_lnet.ln_routers, lp_rtr_list) {
+ lnet_ni_t *ni = rtr->lp_ni;
+ lnet_process_id_t id;
+
+ nrtr++;
+ eqsz += ni->ni_peertxcredits;
+
+ /* one async ping reply per router */
+ id.nid = rtr->lp_nid;
+ id.pid = LUSTRE_SRV_LNET_PID;
+
+ LNET_UNLOCK();
+
+ rc = LNetSetAsync(id, 1);
+ if (rc != 0) {
+ CWARN("LNetSetAsync %s failed: %d\n",
+ libcfs_id2str(id), rc);
+ return rc;
+ }
+
+ LNET_LOCK();
+ /* NB router list doesn't change in userspace */
+ LASSERT (version == the_lnet.ln_routers_version);
+ }
+
+ LNET_UNLOCK();
+
+ if (nrtr == 0) {
+ CDEBUG(D_NET,
+ "No router found, not starting router checker\n");
+ return 0;
+ }
+
+ /* at least allow a SENT and a REPLY per router */
+ if (router_checker_max_eqsize < 2 * nrtr)
+ router_checker_max_eqsize = 2 * nrtr;
+
+ LASSERT (eqsz > 0);
+ if (eqsz > router_checker_max_eqsize)
+ eqsz = router_checker_max_eqsize;
+#endif
+
+ LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
+
+ if (check_routers_before_use &&
+ dead_router_check_interval <= 0) {
+ LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be"
+ " set if 'check_routers_before_use' is set"
+ "\n");
+ return -EINVAL;
+ }
+
+ if (!the_lnet.ln_routing &&
+ live_router_check_interval <= 0 &&
+ dead_router_check_interval <= 0)
+ return 0;
+
+#ifdef __KERNEL__
+ cfs_sema_init(&the_lnet.ln_rc_signal, 0);
+ /* EQ size doesn't matter; the callback is guaranteed to get every
+ * event */
+ eqsz = 0;
+ rc = LNetEQAlloc(eqsz, lnet_router_checker_event,
+ &the_lnet.ln_rc_eqh);
+#else
+ rc = LNetEQAlloc(eqsz, LNET_EQ_HANDLER_NONE,
+ &the_lnet.ln_rc_eqh);
+#endif
+ if (rc != 0) {
+ CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
+ return -ENOMEM;
+ }
+
+ the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
+#ifdef __KERNEL__
+ rc = cfs_create_thread(lnet_router_checker, NULL, 0);
+ if (rc < 0) {
+ CERROR("Can't start router checker thread: %d\n", rc);
+ /* block until event callback signals exit */
+ cfs_down(&the_lnet.ln_rc_signal);
+ rc = LNetEQFree(the_lnet.ln_rc_eqh);
+ LASSERT (rc == 0);
+ the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
+ return -ENOMEM;
+ }
+#endif
+
+ if (check_routers_before_use) {
+ /* Note that a helpful side-effect of pinging all known routers
+ * at startup is that it makes them drop stale connections they
+ * may have to a previous instance of me. */
+ lnet_wait_known_routerstate();
+ }
+