*
*/
#include "gnilnd.h"
-
+#if defined(GNILND_USE_RCA)
+#include <rsms/rs_sm_states.h>
+#endif
/* Advance all timeouts by nap_time seconds. */
void
kgnilnd_bump_timeouts(__u32 nap_time, char *reason)
* we'll back it up and schedule the conn to trigger
* a NOOP */
conn->gnc_last_tx = jiffies - timeout;
+ if (conn->gnc_state != GNILND_CONN_DONE)
kgnilnd_schedule_conn(conn);
}
}
wake_up_all(&dev->gnd_dgping_waitq);
}
+ kgnilnd_wakeup_rca_thread();
+
/* we'll wait for 10x the timeout for the threads to pause */
quiesce_to = cfs_time_seconds(*kgnilnd_tunables.kgn_timeout * 10);
quiesce_deadline = (long) jiffies + quiesce_to;
/* now all the cons/mboxes should be cleaned up, including purgatory
* so go through and release the MDDs for our persistent PHYS fma_blks
*/
- kgnilnd_unmap_phys_fmablk(dev);
+ kgnilnd_unmap_fma_blocks(dev);
LASSERTF(atomic_read(&dev->gnd_nfmablk) == 0,
"reset failed: fma blocks still live %d\n",
int i = 1;
DEFINE_WAIT(wait);
- cfs_daemonize("kgnilnd_rr");
cfs_block_allsigs();
set_user_nice(current, *kgnilnd_tunables.kgn_nice);
kgnilnd_data.kgn_ruhroh_running = 1;
break;
/* Serialize with driver startup and shutdown. */
- down(&kgnilnd_data.kgn_quiesce_sem);
+ mutex_lock(&kgnilnd_data.kgn_quiesce_mutex);
CDEBUG(D_NET, "trigger %d reset %d to_bump %d pause %d\n",
kgnilnd_data.kgn_quiesce_trigger,
set_mb(kgnilnd_data.kgn_needs_reset, 0);
}
- up(&kgnilnd_data.kgn_quiesce_sem);
+ mutex_unlock(&kgnilnd_data.kgn_quiesce_mutex);
}
kgnilnd_data.kgn_ruhroh_running = 0;
CDEBUG(D_NET, "stack reset bypassed because of shutdown\n");
}
}
+
+#if defined(GNILND_USE_RCA)
+#include <krca_lib.h>
+#define RCA_EVENTS 3
+/* RCA ticket is needed for krca_wakeup_wait_event() */
+static krca_ticket_t rca_krt = KRCA_NULL_TICKET;
+struct rcadata {
+ rca_ticket_t ticket;
+ int subscribed;
+ rs_event_code_t ec;
+};
+static struct rcadata rd[RCA_EVENTS] = {
+ {0, 0, ec_node_unavailable},
+ {0, 0, ec_node_available},
+ {0, 0, ec_node_failed}
+};
+
+/* thread for receiving rca events */
+int
+kgnilnd_rca(void *arg)
+{
+ int i, rc;
+ int retry_count;
+ rs_event_t event;
+ lnet_nid_t nid;
+
+ cfs_block_allsigs();
+
+ /* all gnilnd threads need to run fairly urgently */
+ set_user_nice(current, *kgnilnd_tunables.kgn_nice);
+
+ /*
+ * Register our service with RCA and subscribe to events
+ * of interest.
+ */
+ rca_krt = KRCA_NULL_TICKET;
+ rc = krca_register(&rca_krt, RCA_SVCTYPE_GNILND, current->pid, 0);
+ if (rc < 0) {
+ CNETERR("krca_register(%x) returned %d\n", current->pid, rc);
+ goto done;
+ }
+
+ for (i = 0; i < RCA_EVENTS; i++) {
+ retry_count = 0;
+subscribe_retry:
+ rc = krca_subscribe(&rca_krt, rd[i].ec, RCA_RX_SVC_ANY,
+ &rd[i].ticket);
+
+ if ((rc == -EINTR) && !retry_count) {
+ retry_count++;
+ CNETERR("krca_subscribe returned %d - retrying\n", rc);
+ goto subscribe_retry;
+ }
+
+ if (rc < 0) {
+ CNETERR("rca subscription failed (%d)\n", rc);
+ goto done;
+ }
+
+ rd[i].subscribed = 1;
+ }
+
+ while (!kgnilnd_data.kgn_shutdown) {
+ if (unlikely(kgnilnd_data.kgn_quiesce_trigger)) {
+ KGNILND_SPIN_QUIESCE;
+ }
+ /* wait here for a subscribed event */
+ rc = krca_wait_event(&rca_krt);
+
+ /* RCA return values:
+ * 0 indicates krca_wakeup_wait_event caused krca_wait_event
+ * return.
+ * -ERESTARTSYS indicates krca_wait_event returned because of a
+ * signal.
+ * -ENOSPC indicates no space available to create an rcad_reg_t
+ * 1 indicates a message is waiting.
+ */
+ if (rc <= 0) {
+ continue;
+ }
+
+ if (krca_get_message(&rca_krt, &event) == 0) {
+ int node_down = GNILND_RCA_NODE_UNKNOWN;
+ rs_state_t state;
+ LIST_HEAD(zombies);
+
+ /* Compute nodes don't care about other compute nodes
+ * so we don't need to create a peer.
+ */
+ if (GNILND_COMPUTE &&
+ !RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+ IS_SVC)) {
+ continue;
+ }
+
+ /* Only care about compute and service nodes not GPUs */
+ if (RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+ TYPE) != rt_node) {
+ continue;
+ }
+
+ switch (event.ev_id) {
+ case ec_node_available:
+ CDEBUG(D_INFO, "ec_node_available\n");
+ node_down = GNILND_RCA_NODE_UP;
+ break;
+ case ec_node_failed:
+ CDEBUG(D_INFO, "ec_node_failed\n");
+ if (event.ev_len > 0) {
+ CDEBUG(D_ERROR,
+ "ec_node_failed ignored\n");
+ break;
+ }
+ node_down = GNILND_RCA_NODE_DOWN;
+ break;
+ case ec_node_unavailable:
+ state = RSN_GET_FLD(event.ev_gen.svid_node.rsn_intval, STATE);
+
+ CDEBUG(D_INFO, "ec_node_unavailable\n");
+
+ /*
+ * Ignore overloaded ec_node_unavailable events
+ * generated by 'xtcli set_reserve'.
+ */
+ if (RS_GET_CS_STATE(state) == RS_CS_READY) {
+ CDEBUG(D_INFO, "ignoring "
+ "ec_node_unavailable event with"
+ " RS_CS_READY state\n");
+ break;
+ }
+ node_down = GNILND_RCA_NODE_DOWN;
+ break;
+ default:
+ CDEBUG(D_INFO, "unknown event\n");
+ break;
+ }
+
+ /* if we get an event we don't know about, just go ahead
+ * and wait for another event */
+ if (node_down == GNILND_RCA_NODE_UNKNOWN) {
+ continue;
+ }
+
+ nid = RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+ NID);
+ CDEBUG(D_INFO,"kgnilnd_rca() reporting nid %d %s\n",
+ (int)nid, node_down ? "down" : "up");
+ kgnilnd_report_node_state(nid, node_down);
+
+ } else {
+ CNETERR("krca_get_message failed\n");
+ }
+ }
+
+done:
+ CDEBUG(D_INFO, "done\n");
+
+ for (i = 0; i < RCA_EVENTS; i++) {
+ if (rd[i].subscribed) {
+ rc = krca_unsubscribe(&rca_krt, rd[i].ticket);
+
+ if (rc) {
+ CNETERR("rca unsubscribe failed (%d)\n", rc);
+ }
+
+ rd[i].subscribed = 0;
+ }
+ }
+
+ krca_unregister(&rca_krt);
+ kgnilnd_thread_fini();
+ return 0;
+
+}
+
+int
+kgnilnd_start_rca_thread(void)
+{
+ return kgnilnd_thread_start(kgnilnd_rca, NULL, "kgnilnd_rca", 0);
+}
+
+void
+kgnilnd_wakeup_rca_thread(void)
+{
+ int ret;
+
+ ret = krca_wakeup_wait_event(&rca_krt);
+
+ if (ret) {
+ CDEBUG(D_ERROR, "krca_wakeup_wait_event failed\n");
+ }
+}
+
+int
+kgnilnd_get_node_state(__u32 nid)
+{
+ int i;
+ int rc = GNILND_RCA_NODE_UNKNOWN;
+ int ret;
+ rs_node_array_t nlist;
+ rs_node_t *na = NULL;
+
+ if ((ret = krca_get_sysnodes(&nlist)) < 0) {
+ CDEBUG(D_NETERROR, "krca_get_sysnodes failed %d\n", ret);
+ goto ns_done;
+ }
+
+ na = nlist.na_ids;
+
+ for (i = 0; i < nlist.na_len; i++) {
+ if ((rca_nid_t)RSN_GET_FLD(na[i].rs_node_flat, NID) == nid) {
+ rc = RSN_GET_FLD(na[i].rs_node_flat, STATE) == RS_CS_READY ?
+ GNILND_RCA_NODE_UP : GNILND_RCA_NODE_DOWN;
+ break;
+ }
+ }
+
+ns_done:
+ kfree(na);
+ CDEBUG(D_NET, "nid %d rc %d (0=up)\n", nid, rc);
+ return rc;
+}
+
+#else /* GNILND_USE_RCA */
+
+int
+kgnilnd_start_rca_thread(void)
+{
+ return 0;
+}
+
+void
+kgnilnd_wakeup_rca_thread(void)
+{
+}
+
+int
+kgnilnd_get_node_state(__u32 nid)
+{
+ return GNILND_RCA_NODE_UP;
+}
+#endif /* GNILND_USE_RCA */