/*
* Copyright (C) 2012 Cray, Inc.
*
+ * Copyright (c) 2014, Intel Corporation.
+ *
* Author: Nic Henke <nic@cray.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
*/
#include "gnilnd.h"
+#if defined(GNILND_USE_RCA)
#include <rsms/rs_sm_states.h>
-
+#endif
/* Advance all timeouts by nap_time seconds. */
void
kgnilnd_bump_timeouts(__u32 nap_time, char *reason)
kgn_device_t *dev;
kgn_dgram_t *dgram;
- LCONSOLE_INFO("%s: bumping all timeouts by %ds\n", reason, nap_time);
+ CDEBUG(D_INFO, "%s: bumping all timeouts by %ds\n", reason, nap_time);
LASSERTF(GNILND_IS_QUIESCED, "gnilnd not quiesced %d != %d\n",
atomic_read(&kgnilnd_data.kgn_nquiesce),
peer->gnp_reconnect_interval = 0;
/* tell LNet dude is still alive */
kgnilnd_peer_alive(peer);
+ kgnilnd_peer_notify(peer, 0, 1);
list_for_each_entry(tx, &peer->gnp_tx_queue, tx_list) {
tx->tx_qtime = jiffies;
quiesce_to = cfs_time_seconds(*kgnilnd_tunables.kgn_timeout * 10);
quiesce_deadline = (long) jiffies + quiesce_to;
+ LCONSOLE_INFO("Quiesce start: %s\n", reason);
/* wait for everyone to check-in as quiesced */
- i = 1;
while (!GNILND_IS_QUIESCED) {
- i++;
- LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET,
+ CDEBUG(D_INFO,
"%s: Waiting for %d threads to pause\n",
reason,
atomic_read(&kgnilnd_data.kgn_nthreads) -
atomic_read(&kgnilnd_data.kgn_nquiesce));
CFS_RACE(CFS_FAIL_GNI_QUIESCE_RACE);
- cfs_pause(cfs_time_seconds(1 * i));
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(cfs_time_seconds(1 * i));
LASSERTF(quiesce_deadline > jiffies,
"couldn't quiesce threads in %lu seconds, falling over now\n",
cfs_duration_sec(quiesce_to));
}
- LCONSOLE_WARN("%s: All threads paused!\n", reason);
+ CDEBUG(D_INFO, "%s: All threads paused!\n", reason);
/* XXX Nic: Is there a set of counters we can grab here to
* ensure that there is no traffic until quiesce is over ?*/
} else {
- /* GO! GO! GO! */
+ LCONSOLE_INFO("Quiesce complete: %s\n", reason);
for (i = 0; i < kgnilnd_data.kgn_ndevs; i++) {
kgn_device_t *dev = &kgnilnd_data.kgn_devices[i];
/* wait for everyone to check-in as running - they will be spinning
* and looking, so no need to poke any waitq */
- i = 1;
while (atomic_read(&kgnilnd_data.kgn_nquiesce) > 0) {
- i++;
- LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET,
+ CDEBUG(D_INFO,
"%s: Waiting for %d threads to wake up\n",
reason,
atomic_read(&kgnilnd_data.kgn_nquiesce));
- cfs_pause(cfs_time_seconds(1 * i));
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(cfs_time_seconds(1 * i));
}
- LCONSOLE_WARN("%s: All threads awake!\n", reason);
+ CDEBUG(D_INFO, "%s: All threads awake!\n", reason);
}
}
list_del_init(&conn->gnc_schedlist);
+ if (!list_empty(&conn->gnc_delaylist))
+ list_del_init(&conn->gnc_delaylist);
+
if (conn->gnc_state == GNILND_CONN_CLOSING) {
/* bump to CLOSED to fake out send of CLOSE */
conn->gnc_state = GNILND_CONN_CLOSED;
/* now all the cons/mboxes should be cleaned up, including purgatory
* so go through and release the MDDs for our persistent PHYS fma_blks
*/
- kgnilnd_unmap_phys_fmablk(dev);
+ kgnilnd_unmap_fma_blocks(dev);
LASSERTF(atomic_read(&dev->gnd_nfmablk) == 0,
"reset failed: fma blocks still live %d\n",
break;
/* Serialize with driver startup and shutdown. */
- down(&kgnilnd_data.kgn_quiesce_sem);
+ mutex_lock(&kgnilnd_data.kgn_quiesce_mutex);
CDEBUG(D_NET, "trigger %d reset %d to_bump %d pause %d\n",
kgnilnd_data.kgn_quiesce_trigger,
/* Pause all other kgnilnd threads. */
set_mb(kgnilnd_data.kgn_quiesce_trigger, GNILND_QUIESCE_HW_QUIESCE);
- kgnilnd_quiesce_wait("hardware quiesce flag");
+ kgnilnd_quiesce_wait("hardware quiesce");
/* If the hardware quiesce flag is set, wait for it to clear.
* This should happen relatively quickly, so we wait for it.
while (kgnilnd_hw_in_quiesce() || kgnilnd_data.kgn_bump_info_rdy) {
i++;
- LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for hardware quiesce flag to clear\n");
- cfs_pause(cfs_time_seconds(1 * i));
+ CDEBUG(D_INFO, "Waiting for hardware quiesce "
+ "flag to clear\n");
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(cfs_time_seconds(1 * i));
/* If we got a quiesce event with bump info, DO THE BUMP!. */
if (kgnilnd_data.kgn_bump_info_rdy) {
set_mb(kgnilnd_data.kgn_needs_reset, 0);
}
- up(&kgnilnd_data.kgn_quiesce_sem);
+ mutex_unlock(&kgnilnd_data.kgn_quiesce_mutex);
}
kgnilnd_data.kgn_ruhroh_running = 0;
if (!kgnilnd_data.kgn_ruhroh_shutdown) {
- CDEBUG(D_NET, "requesting timeout bump by "LPD64" msecs\n", msecs);
+ CDEBUG(D_NET, "requesting timeout bump by %lld msecs\n", msecs);
/* Save the bump interval and request the bump.
* The memory barrier ensures that the interval is in place before
rs_event_code_t ec;
};
static struct rcadata rd[RCA_EVENTS] = {
- {0, 0, ec_node_unavailable},
- {0, 0, ec_node_available},
- {0, 0, ec_node_failed}
-};
+ { .ec = ec_node_unavailable },
+ { .ec = ec_node_available },
+ { .ec = ec_node_failed } };
/* thread for receiving rca events */
int
}
if (krca_get_message(&rca_krt, &event) == 0) {
- int node_down = GNILND_RCA_NODE_UNKNOWN;
+ int node_down = GNILND_PEER_UNKNOWN;
rs_state_t state;
LIST_HEAD(zombies);
}
/* Only care about compute and service nodes not GPUs */
- if (RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
- TYPE) != rt_node) {
- continue;
+ if (!(RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+ TYPE) == rt_node ||
+ RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+ TYPE) == rt_accel)) {
+ continue;
}
switch (event.ev_id) {
case ec_node_available:
CDEBUG(D_INFO, "ec_node_available\n");
- node_down = GNILND_RCA_NODE_UP;
+ node_down = GNILND_PEER_UP;
break;
case ec_node_failed:
CDEBUG(D_INFO, "ec_node_failed\n");
"ec_node_failed ignored\n");
break;
}
- node_down = GNILND_RCA_NODE_DOWN;
+ node_down = GNILND_PEER_DOWN;
break;
case ec_node_unavailable:
state = RSN_GET_FLD(event.ev_gen.svid_node.rsn_intval, STATE);
" RS_CS_READY state\n");
break;
}
- node_down = GNILND_RCA_NODE_DOWN;
+ node_down = GNILND_PEER_DOWN;
break;
default:
CDEBUG(D_INFO, "unknown event\n");
/* if we get an event we don't know about, just go ahead
* and wait for another event */
- if (node_down == GNILND_RCA_NODE_UNKNOWN) {
+ if (node_down == GNILND_PEER_UNKNOWN)
continue;
- }
nid = RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
NID);
}
}
+int
+kgnilnd_get_node_state(__u32 nid)
+{
+ int i;
+ int rc = GNILND_PEER_UNKNOWN;
+ int ret;
+ rs_node_array_t nlist;
+ rs_node_t *na = NULL;
+
+ if ((ret = krca_get_sysnodes(&nlist)) < 0) {
+ CDEBUG(D_NETERROR, "krca_get_sysnodes failed %d\n", ret);
+ goto ns_done;
+ }
+
+ na = nlist.na_ids;
+
+ for (i = 0; i < nlist.na_len; i++) {
+ if ((rca_nid_t)RSN_GET_FLD(na[i].rs_node_flat, NID) == nid) {
+ rc = RSN_GET_FLD(na[i].rs_node_flat, STATE) == RS_CS_READY ?
+ GNILND_PEER_UP : GNILND_PEER_DOWN;
+ break;
+ }
+ }
+
+ns_done:
+ kfree(na);
+ CDEBUG(D_NET, "nid %d rc %d (0=up)\n", nid, rc);
+ return rc;
+}
+
#else /* GNILND_USE_RCA */
int
{
}
+int
+kgnilnd_get_node_state(__u32 nid)
+{
+ return GNILND_PEER_UP;
+}
#endif /* GNILND_USE_RCA */