Whamcloud - gitweb
LU-5396: o2ib: make local functions static
[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd_stack.c
index 10ae493..a502ce2 100644 (file)
@@ -20,7 +20,9 @@
  *
  */
 #include "gnilnd.h"
-
+#if defined(GNILND_USE_RCA)
+#include <rsms/rs_sm_states.h>
+#endif
 /* Advance all timeouts by nap_time seconds. */
 void
 kgnilnd_bump_timeouts(__u32 nap_time, char *reason)
@@ -73,6 +75,7 @@ kgnilnd_bump_timeouts(__u32 nap_time, char *reason)
                                 * we'll back it up and schedule the conn to trigger
                                 * a NOOP */
                                conn->gnc_last_tx = jiffies - timeout;
+                               if (conn->gnc_state != GNILND_CONN_DONE)
                                kgnilnd_schedule_conn(conn);
                        }
                }
@@ -112,6 +115,8 @@ kgnilnd_quiesce_wait(char *reason)
                        wake_up_all(&dev->gnd_dgping_waitq);
                }
 
+               kgnilnd_wakeup_rca_thread();
+
                /* we'll wait for 10x the timeout for the threads to pause */
                quiesce_to = cfs_time_seconds(*kgnilnd_tunables.kgn_timeout * 10);
                quiesce_deadline = (long) jiffies + quiesce_to;
@@ -294,7 +299,7 @@ kgnilnd_reset_stack(void)
                /* now all the cons/mboxes should be cleaned up, including purgatory
                 * so go through and release the MDDs for our persistent PHYS fma_blks
                 */
-               kgnilnd_unmap_phys_fmablk(dev);
+               kgnilnd_unmap_fma_blocks(dev);
 
                LASSERTF(atomic_read(&dev->gnd_nfmablk) == 0,
                        "reset failed: fma blocks still live %d\n",
@@ -362,7 +367,6 @@ kgnilnd_ruhroh_thread(void *arg)
        int                i = 1;
        DEFINE_WAIT(wait);
 
-       cfs_daemonize("kgnilnd_rr");
        cfs_block_allsigs();
        set_user_nice(current, *kgnilnd_tunables.kgn_nice);
        kgnilnd_data.kgn_ruhroh_running = 1;
@@ -383,7 +387,7 @@ kgnilnd_ruhroh_thread(void *arg)
                        break;
 
                /* Serialize with driver startup and shutdown. */
-               down(&kgnilnd_data.kgn_quiesce_sem);
+               mutex_lock(&kgnilnd_data.kgn_quiesce_mutex);
 
               CDEBUG(D_NET, "trigger %d reset %d to_bump %d pause %d\n",
                        kgnilnd_data.kgn_quiesce_trigger,
@@ -448,7 +452,7 @@ kgnilnd_ruhroh_thread(void *arg)
                        set_mb(kgnilnd_data.kgn_needs_reset, 0);
                }
 
-               up(&kgnilnd_data.kgn_quiesce_sem);
+               mutex_unlock(&kgnilnd_data.kgn_quiesce_mutex);
        }
 
        kgnilnd_data.kgn_ruhroh_running = 0;
@@ -562,3 +566,245 @@ kgnilnd_critical_error(struct gni_err *err_handle)
                CDEBUG(D_NET, "stack reset bypassed because of shutdown\n");
        }
 }
+
+#if defined(GNILND_USE_RCA)
+#include <krca_lib.h>
+#define RCA_EVENTS 3
+/* RCA ticket is needed for krca_wakeup_wait_event() */
+static krca_ticket_t rca_krt = KRCA_NULL_TICKET;
+struct rcadata {
+       rca_ticket_t ticket;
+       int subscribed;
+       rs_event_code_t ec;
+};
+static struct rcadata rd[RCA_EVENTS] = {
+       {0, 0, ec_node_unavailable},
+       {0, 0, ec_node_available},
+       {0, 0, ec_node_failed}
+};
+
+/* thread for receiving rca events */
+int
+kgnilnd_rca(void *arg)
+{
+       int        i, rc;
+       int        retry_count;
+       rs_event_t event;
+       lnet_nid_t nid;
+
+       cfs_block_allsigs();
+
+       /* all gnilnd threads need to run fairly urgently */
+       set_user_nice(current, *kgnilnd_tunables.kgn_nice);
+
+       /*
+        * Register our service with RCA and subscribe to events
+        * of interest.
+        */
+       rca_krt = KRCA_NULL_TICKET;
+       rc = krca_register(&rca_krt, RCA_SVCTYPE_GNILND, current->pid, 0);
+       if (rc < 0) {
+               CNETERR("krca_register(%x) returned %d\n", current->pid, rc);
+               goto done;
+       }
+
+       for (i = 0; i < RCA_EVENTS; i++) {
+               retry_count = 0;
+subscribe_retry:
+               rc = krca_subscribe(&rca_krt, rd[i].ec, RCA_RX_SVC_ANY,
+                                   &rd[i].ticket);
+
+               if ((rc == -EINTR) && !retry_count) {
+                       retry_count++;
+                       CNETERR("krca_subscribe returned %d - retrying\n", rc);
+                       goto subscribe_retry;
+               }
+
+               if (rc < 0) {
+                       CNETERR("rca subscription failed (%d)\n", rc);
+                       goto done;
+               }
+
+               rd[i].subscribed = 1;
+       }
+
+       while (!kgnilnd_data.kgn_shutdown) {
+               if (unlikely(kgnilnd_data.kgn_quiesce_trigger)) {
+                       KGNILND_SPIN_QUIESCE;
+               }
+               /* wait here for a subscribed event */
+               rc = krca_wait_event(&rca_krt);
+
+               /* RCA return values:
+                * 0 indicates krca_wakeup_wait_event caused krca_wait_event
+                *   return.
+                * -ERESTARTSYS indicates krca_wait_event returned because of a
+                *   signal.
+                * -ENOSPC indicates no space available to create an rcad_reg_t
+                * 1 indicates a message is waiting.
+                */
+               if (rc <= 0) {
+                       continue;
+               }
+
+               if (krca_get_message(&rca_krt, &event) == 0) {
+                       int node_down = GNILND_RCA_NODE_UNKNOWN;
+                       rs_state_t state;
+                       LIST_HEAD(zombies);
+
+                       /* Compute nodes don't care about other compute nodes
+                        * so we don't need to create a peer.
+                        */
+                       if (GNILND_COMPUTE &&
+                           !RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+                                       IS_SVC)) {
+                               continue;
+                       }
+
+                       /* Only care about compute and service nodes not GPUs */
+                       if (RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+                                       TYPE) != rt_node) {
+                               continue;
+                       }
+
+                       switch (event.ev_id) {
+                       case ec_node_available:
+                               CDEBUG(D_INFO, "ec_node_available\n");
+                               node_down = GNILND_RCA_NODE_UP;
+                               break;
+                       case ec_node_failed:
+                               CDEBUG(D_INFO, "ec_node_failed\n");
+                               if (event.ev_len > 0) {
+                                       CDEBUG(D_ERROR,
+                                               "ec_node_failed ignored\n");
+                                       break;
+                               }
+                               node_down = GNILND_RCA_NODE_DOWN;
+                               break;
+                       case ec_node_unavailable:
+                               state = RSN_GET_FLD(event.ev_gen.svid_node.rsn_intval, STATE);
+
+                               CDEBUG(D_INFO, "ec_node_unavailable\n");
+
+                               /*
+                                * Ignore overloaded ec_node_unavailable events
+                                * generated by 'xtcli set_reserve'.
+                                */
+                               if (RS_GET_CS_STATE(state) == RS_CS_READY) {
+                                       CDEBUG(D_INFO, "ignoring "
+                                               "ec_node_unavailable event with"
+                                               " RS_CS_READY state\n");
+                                       break;
+                               }
+                               node_down = GNILND_RCA_NODE_DOWN;
+                               break;
+                       default:
+                               CDEBUG(D_INFO, "unknown event\n");
+                               break;
+                       }
+
+                       /* if we get an event we don't know about, just go ahead
+                        * and wait for another event */
+                       if (node_down == GNILND_RCA_NODE_UNKNOWN) {
+                               continue;
+                       }
+
+                       nid = RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+                                         NID);
+                       CDEBUG(D_INFO,"kgnilnd_rca() reporting nid %d %s\n",
+                              (int)nid, node_down ? "down" : "up");
+                       kgnilnd_report_node_state(nid, node_down);
+
+               } else {
+                       CNETERR("krca_get_message failed\n");
+               }
+       }
+
+done:
+       CDEBUG(D_INFO, "done\n");
+
+       for (i = 0; i < RCA_EVENTS; i++) {
+               if (rd[i].subscribed) {
+                       rc = krca_unsubscribe(&rca_krt, rd[i].ticket);
+
+                       if (rc) {
+                               CNETERR("rca unsubscribe failed (%d)\n", rc);
+                       }
+
+                       rd[i].subscribed = 0;
+               }
+       }
+
+       krca_unregister(&rca_krt);
+       kgnilnd_thread_fini();
+       return 0;
+
+}
+
+int
+kgnilnd_start_rca_thread(void)
+{
+       return kgnilnd_thread_start(kgnilnd_rca, NULL, "kgnilnd_rca", 0);
+}
+
+void
+kgnilnd_wakeup_rca_thread(void)
+{
+       int ret;
+
+       ret = krca_wakeup_wait_event(&rca_krt);
+
+       if (ret) {
+               CDEBUG(D_ERROR, "krca_wakeup_wait_event failed\n");
+       }
+}
+
+int
+kgnilnd_get_node_state(__u32 nid)
+{
+       int i;
+       int rc = GNILND_RCA_NODE_UNKNOWN;
+       int ret;
+       rs_node_array_t nlist;
+       rs_node_t       *na = NULL;
+
+       if ((ret = krca_get_sysnodes(&nlist)) < 0) {
+               CDEBUG(D_NETERROR, "krca_get_sysnodes failed %d\n", ret);
+               goto ns_done;
+       }
+
+       na = nlist.na_ids;
+
+       for (i = 0; i < nlist.na_len; i++) {
+               if ((rca_nid_t)RSN_GET_FLD(na[i].rs_node_flat, NID) == nid) {
+                       rc = RSN_GET_FLD(na[i].rs_node_flat, STATE) == RS_CS_READY ?
+                               GNILND_RCA_NODE_UP : GNILND_RCA_NODE_DOWN;
+                       break;
+               }
+       }
+
+ns_done:
+       kfree(na);
+       CDEBUG(D_NET, "nid %d rc %d (0=up)\n", nid, rc);
+       return rc;
+}
+
+#else /* GNILND_USE_RCA */
+
+int
+kgnilnd_start_rca_thread(void)
+{
+       return 0;
+}
+
+void
+kgnilnd_wakeup_rca_thread(void)
+{
+}
+
+int
+kgnilnd_get_node_state(__u32 nid)
+{
+       return GNILND_RCA_NODE_UP;
+}
+#endif /* GNILND_USE_RCA */