-
- lnet_net_lock(gateway->lp_cpt);
- return gateway->lp_rcd;
-}
-
-static int
-lnet_router_check_interval (lnet_peer_t *rtr)
-{
- int secs;
-
- secs = rtr->lp_alive ? live_router_check_interval :
- dead_router_check_interval;
- if (secs < 0)
- secs = 0;
-
- return secs;
-}
-
-static void
-lnet_ping_router_locked (lnet_peer_t *rtr)
-{
- lnet_rc_data_t *rcd = NULL;
- cfs_time_t now = cfs_time_current();
- int secs;
-
- lnet_peer_addref_locked(rtr);
-
- if (rtr->lp_ping_deadline != 0 && /* ping timed out? */
- cfs_time_after(now, rtr->lp_ping_deadline))
- lnet_notify_locked(rtr, 1, 0, now);
-
- /* Run any outstanding notifications */
- lnet_ni_notify_locked(rtr->lp_ni, rtr);
-
- if (!lnet_isrouter(rtr) ||
- the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
- /* router table changed or router checker is shutting down */
- lnet_peer_decref_locked(rtr);
- return;
- }
-
- rcd = rtr->lp_rcd != NULL ?
- rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
-
- if (rcd == NULL)
- return;
-
- secs = lnet_router_check_interval(rtr);
-
- CDEBUG(D_NET,
- "rtr %s %d: deadline %lu ping_notsent %d alive %d "
- "alive_count %d lp_ping_timestamp %lu\n",
- libcfs_nid2str(rtr->lp_nid), secs,
- rtr->lp_ping_deadline, rtr->lp_ping_notsent,
- rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
-
- if (secs != 0 && !rtr->lp_ping_notsent &&
- cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp,
- cfs_time_seconds(secs)))) {
- int rc;
- lnet_process_id_t id;
- lnet_handle_md_t mdh;
-
- id.nid = rtr->lp_nid;
- id.pid = LNET_PID_LUSTRE;
- CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
-
- rtr->lp_ping_notsent = 1;
- rtr->lp_ping_timestamp = now;
-
- mdh = rcd->rcd_mdh;
-
- if (rtr->lp_ping_deadline == 0) {
- rtr->lp_ping_deadline =
- cfs_time_shift(router_ping_timeout);
- }
-
- lnet_net_unlock(rtr->lp_cpt);
-
- rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
-
- lnet_net_lock(rtr->lp_cpt);
- if (rc != 0)
- rtr->lp_ping_notsent = 0; /* no event pending */
- }
-
- lnet_peer_decref_locked(rtr);
- return;
-}
-
-int
-lnet_router_checker_start(void)
-{
- int rc;
- int eqsz = 0;
- struct task_struct *task;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- if (check_routers_before_use &&
- dead_router_check_interval <= 0) {
- LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be"
- " set if 'check_routers_before_use' is set"
- "\n");
- return -EINVAL;
- }
-
- sema_init(&the_lnet.ln_rc_signal, 0);
-
- rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh);
- if (rc != 0) {
- CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
- return -ENOMEM;
- }
-
- the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
- task = kthread_run(lnet_router_checker, NULL, "router_checker");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("Can't start router checker thread: %d\n", rc);
- /* block until event callback signals exit */
- down(&the_lnet.ln_rc_signal);
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT(rc == 0);
- the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
- return -ENOMEM;
- }
-
- if (check_routers_before_use) {
- /* Note that a helpful side-effect of pinging all known routers
- * at startup is that it makes them drop stale connections they
- * may have to a previous instance of me. */
- lnet_wait_known_routerstate();
- }
-
- return 0;
-}
-
-void
-lnet_router_checker_stop (void)
-{
- int rc;
-
- if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
- return;
-
- LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
- the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING;
- /* wakeup the RC thread if it's sleeping */
- wake_up(&the_lnet.ln_rc_waitq);
-
- /* block until event callback signals exit */
- down(&the_lnet.ln_rc_signal);
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT (rc == 0);
- return;
-}
-
-static void
-lnet_prune_rc_data(int wait_unlink)
-{
- lnet_rc_data_t *rcd;
- lnet_rc_data_t *tmp;
- lnet_peer_t *lp;
- struct list_head head;
- int i = 2;
-
- if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
- list_empty(&the_lnet.ln_rcd_deathrow) &&
- list_empty(&the_lnet.ln_rcd_zombie)))
- return;
-
- INIT_LIST_HEAD(&head);
-
- lnet_net_lock(LNET_LOCK_EX);
-
- if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
- /* router checker is stopping, prune all */
- list_for_each_entry(lp, &the_lnet.ln_routers,
- lp_rtr_list) {
- if (lp->lp_rcd == NULL)
- continue;
-
- LASSERT(list_empty(&lp->lp_rcd->rcd_list));
- list_add(&lp->lp_rcd->rcd_list,
- &the_lnet.ln_rcd_deathrow);
- lp->lp_rcd = NULL;
- }
- }
-
- /* unlink all RCDs on deathrow list */
- list_splice_init(&the_lnet.ln_rcd_deathrow, &head);
-
- if (!list_empty(&head)) {
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry(rcd, &head, rcd_list)
- LNetMDUnlink(rcd->rcd_mdh);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- list_splice_init(&head, &the_lnet.ln_rcd_zombie);
-
- /* release all zombie RCDs */
- while (!list_empty(&the_lnet.ln_rcd_zombie)) {
- list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie,
- rcd_list) {
- if (LNetHandleIsInvalid(rcd->rcd_mdh))
- list_move(&rcd->rcd_list, &head);
- }
-
- wait_unlink = wait_unlink &&
- !list_empty(&the_lnet.ln_rcd_zombie);
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- while (!list_empty(&head)) {
- rcd = list_entry(head.next,
- lnet_rc_data_t, rcd_list);
- list_del_init(&rcd->rcd_list);
- lnet_destroy_rc_data(rcd);
- }
-
- if (!wait_unlink)
- return;
-
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for rc buffers to unlink\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(cfs_time_seconds(1) / 4);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- lnet_net_unlock(LNET_LOCK_EX);