X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fgnilnd%2Fgnilnd_stack.c;h=bdec6856fa75d55da3bcbb399ca7c62a84c1022d;hb=b19b318b5e74e76b3fc5b15c796666d04df62466;hp=43d301a20fa903c7f31721ac1d6aab0596af5ab7;hpb=e8bf4e3eadf1cec9a0c9dca609a0b023fc5a397d;p=fs%2Flustre-release.git diff --git a/lnet/klnds/gnilnd/gnilnd_stack.c b/lnet/klnds/gnilnd/gnilnd_stack.c index 43d301a..bdec685 100644 --- a/lnet/klnds/gnilnd/gnilnd_stack.c +++ b/lnet/klnds/gnilnd/gnilnd_stack.c @@ -1,6 +1,8 @@ /* * Copyright (C) 2012 Cray, Inc. * + * Copyright (c) 2014, Intel Corporation. + * * Author: Nic Henke * * This file is part of Lustre, http://www.lustre.org. @@ -34,7 +36,7 @@ kgnilnd_bump_timeouts(__u32 nap_time, char *reason) kgn_device_t *dev; kgn_dgram_t *dgram; - LCONSOLE_INFO("%s: bumping all timeouts by %ds\n", reason, nap_time); + CDEBUG(D_INFO, "%s: bumping all timeouts by %ds\n", reason, nap_time); LASSERTF(GNILND_IS_QUIESCED, "gnilnd not quiesced %d != %d\n", atomic_read(&kgnilnd_data.kgn_nquiesce), @@ -56,6 +58,7 @@ kgnilnd_bump_timeouts(__u32 nap_time, char *reason) peer->gnp_reconnect_interval = 0; /* tell LNet dude is still alive */ kgnilnd_peer_alive(peer); + kgnilnd_peer_notify(peer, 0, 1); list_for_each_entry(tx, &peer->gnp_tx_queue, tx_list) { tx->tx_qtime = jiffies; @@ -121,28 +124,28 @@ kgnilnd_quiesce_wait(char *reason) quiesce_to = cfs_time_seconds(*kgnilnd_tunables.kgn_timeout * 10); quiesce_deadline = (long) jiffies + quiesce_to; + LCONSOLE_INFO("Quiesce start: %s\n", reason); /* wait for everyone to check-in as quiesced */ - i = 1; while (!GNILND_IS_QUIESCED) { - i++; - LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET, + CDEBUG(D_INFO, "%s: Waiting for %d threads to pause\n", reason, atomic_read(&kgnilnd_data.kgn_nthreads) - atomic_read(&kgnilnd_data.kgn_nquiesce)); CFS_RACE(CFS_FAIL_GNI_QUIESCE_RACE); - cfs_pause(cfs_time_seconds(1 * i)); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1 * i)); LASSERTF(quiesce_deadline > jiffies, "couldn't quiesce threads in %lu seconds, falling over now\n", cfs_duration_sec(quiesce_to)); } - LCONSOLE_WARN("%s: All threads paused!\n", reason); + CDEBUG(D_INFO, "%s: All threads paused!\n", reason); /* XXX Nic: Is there a set of counters we can grab here to * ensure that there is no traffic until quiesce is over ?*/ } else { - /* GO! GO! GO! */ + LCONSOLE_INFO("Quiesce complete: %s\n", reason); for (i = 0; i < kgnilnd_data.kgn_ndevs; i++) { kgn_device_t *dev = &kgnilnd_data.kgn_devices[i]; @@ -151,17 +154,16 @@ kgnilnd_quiesce_wait(char *reason) /* wait for everyone to check-in as running - they will be spinning * and looking, so no need to poke any waitq */ - i = 1; while (atomic_read(&kgnilnd_data.kgn_nquiesce) > 0) { - i++; - LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET, + CDEBUG(D_INFO, "%s: Waiting for %d threads to wake up\n", reason, atomic_read(&kgnilnd_data.kgn_nquiesce)); - cfs_pause(cfs_time_seconds(1 * i)); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1 * i)); } - LCONSOLE_WARN("%s: All threads awake!\n", reason); + CDEBUG(D_INFO, "%s: All threads awake!\n", reason); } } @@ -252,6 +254,9 @@ kgnilnd_reset_stack(void) list_del_init(&conn->gnc_schedlist); + if (!list_empty(&conn->gnc_delaylist)) + list_del_init(&conn->gnc_delaylist); + if (conn->gnc_state == GNILND_CONN_CLOSING) { /* bump to CLOSED to fake out send of CLOSE */ conn->gnc_state = GNILND_CONN_CLOSED; @@ -387,7 +392,7 @@ kgnilnd_ruhroh_thread(void *arg) break; /* Serialize with driver startup and shutdown. */ - down(&kgnilnd_data.kgn_quiesce_sem); + mutex_lock(&kgnilnd_data.kgn_quiesce_mutex); CDEBUG(D_NET, "trigger %d reset %d to_bump %d pause %d\n", kgnilnd_data.kgn_quiesce_trigger, @@ -400,7 +405,7 @@ kgnilnd_ruhroh_thread(void *arg) /* Pause all other kgnilnd threads. */ set_mb(kgnilnd_data.kgn_quiesce_trigger, GNILND_QUIESCE_HW_QUIESCE); - kgnilnd_quiesce_wait("hardware quiesce flag"); + kgnilnd_quiesce_wait("hardware quiesce"); /* If the hardware quiesce flag is set, wait for it to clear. * This should happen relatively quickly, so we wait for it. @@ -415,9 +420,10 @@ kgnilnd_ruhroh_thread(void *arg) while (kgnilnd_hw_in_quiesce() || kgnilnd_data.kgn_bump_info_rdy) { i++; - LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for hardware quiesce flag to clear\n"); - cfs_pause(cfs_time_seconds(1 * i)); + CDEBUG(D_INFO, "Waiting for hardware quiesce " + "flag to clear\n"); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1 * i)); /* If we got a quiesce event with bump info, DO THE BUMP!. */ if (kgnilnd_data.kgn_bump_info_rdy) { @@ -452,7 +458,7 @@ kgnilnd_ruhroh_thread(void *arg) set_mb(kgnilnd_data.kgn_needs_reset, 0); } - up(&kgnilnd_data.kgn_quiesce_sem); + mutex_unlock(&kgnilnd_data.kgn_quiesce_mutex); } kgnilnd_data.kgn_ruhroh_running = 0; @@ -532,7 +538,7 @@ kgnilnd_quiesce_end_callback(gni_nic_handle_t nic_handle, uint64_t msecs) if (!kgnilnd_data.kgn_ruhroh_shutdown) { - CDEBUG(D_NET, "requesting timeout bump by "LPD64" msecs\n", msecs); + CDEBUG(D_NET, "requesting timeout bump by %lld msecs\n", msecs); /* Save the bump interval and request the bump. * The memory barrier ensures that the interval is in place before @@ -578,10 +584,9 @@ struct rcadata { rs_event_code_t ec; }; static struct rcadata rd[RCA_EVENTS] = { - {0, 0, ec_node_unavailable}, - {0, 0, ec_node_available}, - {0, 0, ec_node_failed} -}; + { .ec = ec_node_unavailable }, + { .ec = ec_node_available }, + { .ec = ec_node_failed } }; /* thread for receiving rca events */ int @@ -648,7 +653,7 @@ subscribe_retry: } if (krca_get_message(&rca_krt, &event) == 0) { - int node_down = GNILND_RCA_NODE_UNKNOWN; + int node_down = GNILND_PEER_UNKNOWN; rs_state_t state; LIST_HEAD(zombies); @@ -662,15 +667,17 @@ subscribe_retry: } /* Only care about compute and service nodes not GPUs */ - if (RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat, - TYPE) != rt_node) { - continue; + if (!(RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat, + TYPE) == rt_node || + RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat, + TYPE) == rt_accel)) { + continue; } switch (event.ev_id) { case ec_node_available: CDEBUG(D_INFO, "ec_node_available\n"); - node_down = GNILND_RCA_NODE_UP; + node_down = GNILND_PEER_UP; break; case ec_node_failed: CDEBUG(D_INFO, "ec_node_failed\n"); @@ -679,7 +686,7 @@ subscribe_retry: "ec_node_failed ignored\n"); break; } - node_down = GNILND_RCA_NODE_DOWN; + node_down = GNILND_PEER_DOWN; break; case ec_node_unavailable: state = RSN_GET_FLD(event.ev_gen.svid_node.rsn_intval, STATE); @@ -696,7 +703,7 @@ subscribe_retry: " RS_CS_READY state\n"); break; } - node_down = GNILND_RCA_NODE_DOWN; + node_down = GNILND_PEER_DOWN; break; default: CDEBUG(D_INFO, "unknown event\n"); @@ -705,9 +712,8 @@ subscribe_retry: /* if we get an event we don't know about, just go ahead * and wait for another event */ - if (node_down == GNILND_RCA_NODE_UNKNOWN) { + if (node_down == GNILND_PEER_UNKNOWN) continue; - } nid = RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat, NID); @@ -763,7 +769,7 @@ int kgnilnd_get_node_state(__u32 nid) { int i; - int rc = GNILND_RCA_NODE_UNKNOWN; + int rc = GNILND_PEER_UNKNOWN; int ret; rs_node_array_t nlist; rs_node_t *na = NULL; @@ -778,7 +784,7 @@ kgnilnd_get_node_state(__u32 nid) for (i = 0; i < nlist.na_len; i++) { if ((rca_nid_t)RSN_GET_FLD(na[i].rs_node_flat, NID) == nid) { rc = RSN_GET_FLD(na[i].rs_node_flat, STATE) == RS_CS_READY ? - GNILND_RCA_NODE_UP : GNILND_RCA_NODE_DOWN; + GNILND_PEER_UP : GNILND_PEER_DOWN; break; } } @@ -805,6 +811,6 @@ kgnilnd_wakeup_rca_thread(void) int kgnilnd_get_node_state(__u32 nid) { - return GNILND_RCA_NODE_UP; + return GNILND_PEER_UP; } #endif /* GNILND_USE_RCA */