Whamcloud - gitweb
LU-12931 gnilnd: use time_after() to compare jiffies
[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd_stack.c
index 6cf5f7b..bab50cb 100644 (file)
@@ -1,6 +1,8 @@
 /*
  * Copyright (C) 2012 Cray, Inc.
  *
+ * Copyright (c) 2014, Intel Corporation.
+ *
  *   Author: Nic Henke <nic@cray.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
@@ -20,8 +22,9 @@
  *
  */
 #include "gnilnd.h"
+#if defined(GNILND_USE_RCA)
 #include <rsms/rs_sm_states.h>
-
+#endif
 /* Advance all timeouts by nap_time seconds. */
 void
 kgnilnd_bump_timeouts(__u32 nap_time, char *reason)
@@ -33,7 +36,7 @@ kgnilnd_bump_timeouts(__u32 nap_time, char *reason)
        kgn_device_t           *dev;
        kgn_dgram_t            *dgram;
 
-       LCONSOLE_INFO("%s: bumping all timeouts by %ds\n", reason, nap_time);
+       CDEBUG(D_INFO, "%s: bumping all timeouts by %ds\n", reason, nap_time);
 
        LASSERTF(GNILND_IS_QUIESCED, "gnilnd not quiesced %d != %d\n",
                 atomic_read(&kgnilnd_data.kgn_nquiesce),
@@ -55,6 +58,7 @@ kgnilnd_bump_timeouts(__u32 nap_time, char *reason)
                        peer->gnp_reconnect_interval = 0;
                        /* tell LNet dude is still alive */
                        kgnilnd_peer_alive(peer);
+                       kgnilnd_peer_notify(peer, 0, 1);
 
                        list_for_each_entry(tx, &peer->gnp_tx_queue, tx_list) {
                                tx->tx_qtime = jiffies;
@@ -120,28 +124,27 @@ kgnilnd_quiesce_wait(char *reason)
                quiesce_to = cfs_time_seconds(*kgnilnd_tunables.kgn_timeout * 10);
                quiesce_deadline = (long) jiffies + quiesce_to;
 
+               LCONSOLE_INFO("Quiesce start: %s\n", reason);
                /* wait for everyone to check-in as quiesced */
-               i = 1;
                while (!GNILND_IS_QUIESCED) {
-                       i++;
-                       LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET,
+                       CDEBUG(D_INFO,
                                 "%s: Waiting for %d threads to pause\n",
                                 reason,
                                 atomic_read(&kgnilnd_data.kgn_nthreads) -
                                 atomic_read(&kgnilnd_data.kgn_nquiesce));
                        CFS_RACE(CFS_FAIL_GNI_QUIESCE_RACE);
-                       cfs_pause(cfs_time_seconds(1 * i));
+                       schedule_timeout_uninterruptible(cfs_time_seconds(i));
 
-                       LASSERTF(quiesce_deadline > jiffies,
+                       LASSERTF(time_after(quiesce_deadline, jiffies),
                                 "couldn't quiesce threads in %lu seconds, falling over now\n",
                                 cfs_duration_sec(quiesce_to));
                }
 
-               LCONSOLE_WARN("%s: All threads paused!\n", reason);
+               CDEBUG(D_INFO, "%s: All threads paused!\n", reason);
                /* XXX Nic: Is there a set of counters we can grab here to
                 * ensure that there is no traffic until quiesce is over ?*/
        } else {
-               /* GO! GO! GO! */
+               LCONSOLE_INFO("Quiesce complete: %s\n", reason);
 
                for (i = 0; i < kgnilnd_data.kgn_ndevs; i++) {
                        kgn_device_t *dev = &kgnilnd_data.kgn_devices[i];
@@ -150,17 +153,15 @@ kgnilnd_quiesce_wait(char *reason)
 
                /* wait for everyone to check-in as running - they will be spinning
                 * and looking, so no need to poke any waitq */
-               i = 1;
                while (atomic_read(&kgnilnd_data.kgn_nquiesce) > 0) {
-                       i++;
-                       LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET,
+                       CDEBUG(D_INFO,
                                 "%s: Waiting for %d threads to wake up\n",
                                  reason,
                                  atomic_read(&kgnilnd_data.kgn_nquiesce));
-                       cfs_pause(cfs_time_seconds(1 * i));
+                       schedule_timeout_uninterruptible(cfs_time_seconds(i));
                }
 
-               LCONSOLE_WARN("%s: All threads awake!\n", reason);
+               CDEBUG(D_INFO, "%s: All threads awake!\n", reason);
        }
 }
 
@@ -251,6 +252,9 @@ kgnilnd_reset_stack(void)
 
                        list_del_init(&conn->gnc_schedlist);
 
+                       if (!list_empty(&conn->gnc_delaylist))
+                               list_del_init(&conn->gnc_delaylist); 
+
                        if (conn->gnc_state == GNILND_CONN_CLOSING) {
                                /* bump to CLOSED to fake out send of CLOSE */
                                conn->gnc_state = GNILND_CONN_CLOSED;
@@ -298,7 +302,7 @@ kgnilnd_reset_stack(void)
                /* now all the cons/mboxes should be cleaned up, including purgatory
                 * so go through and release the MDDs for our persistent PHYS fma_blks
                 */
-               kgnilnd_unmap_phys_fmablk(dev);
+               kgnilnd_unmap_fma_blocks(dev);
 
                LASSERTF(atomic_read(&dev->gnd_nfmablk) == 0,
                        "reset failed: fma blocks still live %d\n",
@@ -366,7 +370,6 @@ kgnilnd_ruhroh_thread(void *arg)
        int                i = 1;
        DEFINE_WAIT(wait);
 
-       cfs_block_allsigs();
        set_user_nice(current, *kgnilnd_tunables.kgn_nice);
        kgnilnd_data.kgn_ruhroh_running = 1;
 
@@ -386,7 +389,7 @@ kgnilnd_ruhroh_thread(void *arg)
                        break;
 
                /* Serialize with driver startup and shutdown. */
-               down(&kgnilnd_data.kgn_quiesce_sem);
+               mutex_lock(&kgnilnd_data.kgn_quiesce_mutex);
 
               CDEBUG(D_NET, "trigger %d reset %d to_bump %d pause %d\n",
                        kgnilnd_data.kgn_quiesce_trigger,
@@ -399,7 +402,7 @@ kgnilnd_ruhroh_thread(void *arg)
 
                        /* Pause all other kgnilnd threads. */
                        set_mb(kgnilnd_data.kgn_quiesce_trigger, GNILND_QUIESCE_HW_QUIESCE);
-                       kgnilnd_quiesce_wait("hardware quiesce flag");
+                       kgnilnd_quiesce_wait("hardware quiesce");
 
                        /* If the hardware quiesce flag is set, wait for it to clear.
                         * This should happen relatively quickly, so we wait for it.
@@ -414,9 +417,10 @@ kgnilnd_ruhroh_thread(void *arg)
                        while (kgnilnd_hw_in_quiesce() || kgnilnd_data.kgn_bump_info_rdy) {
 
                                i++;
-                               LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET,
-                                               "Waiting for hardware quiesce flag to clear\n");
-                               cfs_pause(cfs_time_seconds(1 * i));
+                               CDEBUG(D_INFO, "Waiting for hardware quiesce "
+                                              "flag to clear\n");
+                               schedule_timeout_uninterruptible(
+                                       cfs_time_seconds(i));
 
                                /* If we got a quiesce event with bump info, DO THE BUMP!. */
                                if (kgnilnd_data.kgn_bump_info_rdy) {
@@ -451,7 +455,7 @@ kgnilnd_ruhroh_thread(void *arg)
                        set_mb(kgnilnd_data.kgn_needs_reset, 0);
                }
 
-               up(&kgnilnd_data.kgn_quiesce_sem);
+               mutex_unlock(&kgnilnd_data.kgn_quiesce_mutex);
        }
 
        kgnilnd_data.kgn_ruhroh_running = 0;
@@ -531,7 +535,7 @@ kgnilnd_quiesce_end_callback(gni_nic_handle_t nic_handle, uint64_t msecs)
 
        if (!kgnilnd_data.kgn_ruhroh_shutdown) {
 
-               CDEBUG(D_NET, "requesting timeout bump by "LPD64" msecs\n", msecs);
+               CDEBUG(D_NET, "requesting timeout bump by %lld msecs\n", msecs);
 
                /* Save the bump interval and request the bump.
                 * The memory barrier ensures that the interval is in place before
@@ -577,10 +581,9 @@ struct rcadata {
        rs_event_code_t ec;
 };
 static struct rcadata rd[RCA_EVENTS] = {
-       {0, 0, ec_node_unavailable},
-       {0, 0, ec_node_available},
-       {0, 0, ec_node_failed}
-};
+       { .ec = ec_node_unavailable },
+       { .ec = ec_node_available },
+       { .ec = ec_node_failed } };
 
 /* thread for receiving rca events */
 int
@@ -591,8 +594,6 @@ kgnilnd_rca(void *arg)
        rs_event_t event;
        lnet_nid_t nid;
 
-       cfs_block_allsigs();
-
        /* all gnilnd threads need to run fairly urgently */
        set_user_nice(current, *kgnilnd_tunables.kgn_nice);
 
@@ -647,7 +648,7 @@ subscribe_retry:
                }
 
                if (krca_get_message(&rca_krt, &event) == 0) {
-                       int node_down = GNILND_RCA_NODE_UNKNOWN;
+                       int node_down = GNILND_PEER_UNKNOWN;
                        rs_state_t state;
                        LIST_HEAD(zombies);
 
@@ -661,15 +662,17 @@ subscribe_retry:
                        }
 
                        /* Only care about compute and service nodes not GPUs */
-                       if (RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
-                                       TYPE) != rt_node) {
-                               continue;
+                       if (!(RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+                                       TYPE) == rt_node ||
+                            RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
+                                       TYPE) == rt_accel)) {
+                                               continue;
                        }
 
                        switch (event.ev_id) {
                        case ec_node_available:
                                CDEBUG(D_INFO, "ec_node_available\n");
-                               node_down = GNILND_RCA_NODE_UP;
+                               node_down = GNILND_PEER_UP;
                                break;
                        case ec_node_failed:
                                CDEBUG(D_INFO, "ec_node_failed\n");
@@ -678,7 +681,7 @@ subscribe_retry:
                                                "ec_node_failed ignored\n");
                                        break;
                                }
-                               node_down = GNILND_RCA_NODE_DOWN;
+                               node_down = GNILND_PEER_DOWN;
                                break;
                        case ec_node_unavailable:
                                state = RSN_GET_FLD(event.ev_gen.svid_node.rsn_intval, STATE);
@@ -695,7 +698,7 @@ subscribe_retry:
                                                " RS_CS_READY state\n");
                                        break;
                                }
-                               node_down = GNILND_RCA_NODE_DOWN;
+                               node_down = GNILND_PEER_DOWN;
                                break;
                        default:
                                CDEBUG(D_INFO, "unknown event\n");
@@ -704,9 +707,8 @@ subscribe_retry:
 
                        /* if we get an event we don't know about, just go ahead
                         * and wait for another event */
-                       if (node_down == GNILND_RCA_NODE_UNKNOWN) {
+                       if (node_down == GNILND_PEER_UNKNOWN)
                                continue;
-                       }
 
                        nid = RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
                                          NID);
@@ -758,6 +760,36 @@ kgnilnd_wakeup_rca_thread(void)
        }
 }
 
+int
+kgnilnd_get_node_state(__u32 nid)
+{
+       int i;
+       int rc = GNILND_PEER_UNKNOWN;
+       int ret;
+       rs_node_array_t nlist;
+       rs_node_t       *na = NULL;
+
+       if ((ret = krca_get_sysnodes(&nlist)) < 0) {
+               CDEBUG(D_NETERROR, "krca_get_sysnodes failed %d\n", ret);
+               goto ns_done;
+       }
+
+       na = nlist.na_ids;
+
+       for (i = 0; i < nlist.na_len; i++) {
+               if ((rca_nid_t)RSN_GET_FLD(na[i].rs_node_flat, NID) == nid) {
+                       rc = RSN_GET_FLD(na[i].rs_node_flat, STATE) == RS_CS_READY ?
+                               GNILND_PEER_UP : GNILND_PEER_DOWN;
+                       break;
+               }
+       }
+
+ns_done:
+       kfree(na);
+       CDEBUG(D_NET, "nid %d rc %d (0=up)\n", nid, rc);
+       return rc;
+}
+
 #else /* GNILND_USE_RCA */
 
 int
@@ -771,4 +803,9 @@ kgnilnd_wakeup_rca_thread(void)
 {
 }
 
+int
+kgnilnd_get_node_state(__u32 nid)
+{
+       return GNILND_PEER_UP;
+}
 #endif /* GNILND_USE_RCA */