X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fgnilnd%2Fgnilnd.c;h=14fb5d6125f5a9e2c8b1625451ecb0d8a60d64d9;hp=4b79eeb7a251502913328512472747b042541f5f;hb=5c883ea2748ae9e430a9cd863a9b630b2a74440a;hpb=33d9640be47667b96d31498ccb6824904925d8ee diff --git a/lnet/klnds/gnilnd/gnilnd.c b/lnet/klnds/gnilnd/gnilnd.c index 4b79eeb..14fb5d6 100644 --- a/lnet/klnds/gnilnd/gnilnd.c +++ b/lnet/klnds/gnilnd/gnilnd.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2012 Cray, Inc. * - * Copyright (c) 2013, 2015, Intel Corporation. + * Copyright (c) 2013, 2017, Intel Corporation. * * Author: Nic Henke * Author: James Shimek @@ -25,7 +25,7 @@ #include "gnilnd.h" /* Primary entry points from LNET. There are no guarantees against reentrance. */ -lnd_t the_kgnilnd = { +const struct lnet_lnd the_kgnilnd = { .lnd_type = GNILND, .lnd_startup = kgnilnd_startup, .lnd_shutdown = kgnilnd_shutdown, @@ -33,7 +33,6 @@ lnd_t the_kgnilnd = { .lnd_send = kgnilnd_send, .lnd_recv = kgnilnd_recv, .lnd_eager_recv = kgnilnd_eager_recv, - .lnd_query = kgnilnd_query, }; kgn_data_t kgnilnd_data; @@ -272,7 +271,7 @@ kgnilnd_create_conn(kgn_conn_t **connp, kgn_device_t *dev) conn->gnc_next_tx = (int) GNILND_MAX_MSG_ID - 10; /* if this fails, we have conflicts and MAX_TX is too large */ - CLASSERT(GNILND_MAX_MSG_ID < GNILND_MSGID_CLOSE); + BUILD_BUG_ON(GNILND_MAX_MSG_ID >= GNILND_MSGID_CLOSE); /* get a new unique CQ id for this conn */ write_lock(&kgnilnd_data.kgn_peer_conn_lock); @@ -292,8 +291,8 @@ kgnilnd_create_conn(kgn_conn_t **connp, kgn_device_t *dev) * check context */ conn->gnc_device = dev; - conn->gnc_timeout = MAX(*kgnilnd_tunables.kgn_timeout, - GNILND_MIN_TIMEOUT); + conn->gnc_timeout = max(*kgnilnd_tunables.kgn_timeout, + GNILND_MIN_TIMEOUT); kgnilnd_update_reaper_timeout(conn->gnc_timeout); /* this is the ep_handle for doing SMSG & BTE */ @@ -505,7 +504,9 @@ kgnilnd_destroy_conn(kgn_conn_t *conn) void kgnilnd_peer_alive(kgn_peer_t *peer) { - set_mb(peer->gnp_last_alive, jiffies); + time64_t now = ktime_get_seconds(); + + set_mb(peer->gnp_last_alive, now); } void @@ -601,11 +602,12 @@ kgnilnd_peer_notify(kgn_peer_t *peer, int error, int alive) peer_nid = kgnilnd_lnd2lnetnid(net->gnn_ni->ni_nid, peer->gnp_nid); - CDEBUG(D_NET, "peer 0x%p->%s last_alive %lu (%lus ago)\n", + CDEBUG(D_NET, "peer 0x%p->%s last_alive %lld (%llds ago)\n", peer, libcfs_nid2str(peer_nid), peer->gnp_last_alive, - cfs_duration_sec(jiffies - peer->gnp_last_alive)); + ktime_get_seconds() - peer->gnp_last_alive); lnet_notify(net->gnn_ni, peer_nid, alive, + (alive) ? true : false, peer->gnp_last_alive); kgnilnd_net_decref(net); @@ -877,7 +879,7 @@ kgnilnd_set_conn_params(kgn_dgram_t *dgram) /* set timeout vals in conn early so we can use them for the NAK */ /* use max of the requested and our timeout, peer will do the same */ - conn->gnc_timeout = MAX(conn->gnc_timeout, connreq->gncr_timeout); + conn->gnc_timeout = max(conn->gnc_timeout, connreq->gncr_timeout); /* only ep_bind really mucks around with the CQ */ /* only ep bind if we are not connecting to ourself and the dstnid is not a wildcard. this check @@ -1259,8 +1261,8 @@ kgnilnd_peer_increase_reconnect_locked(kgn_peer_t *peer) current_to += *kgnilnd_tunables.kgn_min_reconnect_interval / 2; } - current_to = MIN(current_to, - *kgnilnd_tunables.kgn_max_reconnect_interval); + current_to = min(current_to, + *kgnilnd_tunables.kgn_max_reconnect_interval); peer->gnp_reconnect_interval = current_to; CDEBUG(D_NET, "peer %s can reconnect at %lu interval %lu\n", @@ -1596,8 +1598,7 @@ kgnilnd_del_conn_or_peer(kgn_net_t *net, lnet_nid_t nid, int command, atomic_read(&kgnilnd_data.kgn_npending_detach) || atomic_read(&kgnilnd_data.kgn_npending_unlink)) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout_uninterruptible(cfs_time_seconds(1)); i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, "Waiting on %d peers %d closes %d detaches\n", @@ -1800,7 +1801,7 @@ kgnilnd_report_node_state(lnet_nid_t nid, int down) } int -kgnilnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) +kgnilnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) { struct libcfs_ioctl_data *data = arg; kgn_net_t *net = ni->ni_data; @@ -1919,77 +1920,6 @@ kgnilnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) return rc; } -void -kgnilnd_query(lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when) -{ - kgn_net_t *net = ni->ni_data; - kgn_tx_t *tx; - kgn_peer_t *peer = NULL; - kgn_conn_t *conn = NULL; - lnet_process_id_t id = { - .nid = nid, - .pid = LNET_PID_LUSTRE, - }; - ENTRY; - - /* I expect to find him, so only take a read lock */ - read_lock(&kgnilnd_data.kgn_peer_conn_lock); - peer = kgnilnd_find_peer_locked(nid); - if (peer != NULL) { - /* LIE if in a quiesce - we will update the timeouts after, - * but we don't want sends failing during it */ - if (kgnilnd_data.kgn_quiesce_trigger) { - *when = jiffies; - read_unlock(&kgnilnd_data.kgn_peer_conn_lock); - GOTO(out, 0); - } - - /* Update to best guess, might refine on later checks */ - *when = peer->gnp_last_alive; - - /* we have a peer, how about a conn? */ - conn = kgnilnd_find_conn_locked(peer); - - if (conn == NULL) { - /* if there is no conn, check peer last errno to see if clean disconnect - * - if it was, we lie to LNet because we believe a TX would complete - * on reconnect */ - if (kgnilnd_conn_clean_errno(peer->gnp_last_errno)) { - *when = jiffies; - } - /* we still want to fire a TX and new conn in this case */ - } else { - /* gnp_last_alive is valid, run for the hills */ - read_unlock(&kgnilnd_data.kgn_peer_conn_lock); - GOTO(out, 0); - } - } - /* if we get here, either we have no peer or no conn for him, so fire off - * new TX to trigger conn setup */ - read_unlock(&kgnilnd_data.kgn_peer_conn_lock); - - /* if we couldn't find him, we'll fire up a TX and get connected - - * if we don't do this, after ni_peer_timeout, LNet will declare him dead. - * So really we treat kgnilnd_query as a bit of a 'connect now' type - * event because it'll only do this when it wants to send - * - * Use a real TX for this to get the proper gnp_tx_queue behavior, etc - * normally we'd use kgnilnd_send_ctlmsg for this, but we don't really - * care that this goes out quickly since we already know we need a new conn - * formed */ - if (CFS_FAIL_CHECK(CFS_FAIL_GNI_NOOP_SEND)) - return; - - tx = kgnilnd_new_tx_msg(GNILND_MSG_NOOP, ni->ni_nid); - if (tx != NULL) { - kgnilnd_launch_tx(tx, net, &id); - } -out: - CDEBUG(D_NETTRACE, "peer 0x%p->%s when %lu\n", peer, - libcfs_nid2str(nid), *when); - EXIT; -} - int kgnilnd_dev_init(kgn_device_t *dev) { @@ -2301,7 +2231,8 @@ int kgnilnd_base_startup(void) /* OK to call kgnilnd_api_shutdown() to cleanup now */ kgnilnd_data.kgn_init = GNILND_INIT_DATA; - try_module_get(THIS_MODULE); + if (!try_module_get(THIS_MODULE)) + GOTO(failed, rc = -ENOENT); rwlock_init(&kgnilnd_data.kgn_peer_conn_lock); @@ -2532,8 +2463,7 @@ kgnilnd_base_shutdown(void) CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, "Waiting for conns to be cleaned up %d\n",atomic_read(&kgnilnd_data.kgn_nconns)); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout_uninterruptible(cfs_time_seconds(1)); } /* Peer state all cleaned up BEFORE setting shutdown, so threads don't * have to worry about shutdown races. NB connections may be created @@ -2552,8 +2482,7 @@ kgnilnd_base_shutdown(void) i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, "Waiting for ruhroh thread to terminate\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout_uninterruptible(cfs_time_seconds(1)); } /* Flag threads to terminate */ @@ -2585,8 +2514,7 @@ kgnilnd_base_shutdown(void) CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "Waiting for %d threads to terminate\n", atomic_read(&kgnilnd_data.kgn_nthreads)); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout_uninterruptible(cfs_time_seconds(1)); } LASSERTF(atomic_read(&kgnilnd_data.kgn_npeers) == 0, @@ -2677,7 +2605,7 @@ kgnilnd_base_shutdown(void) } int -kgnilnd_startup(lnet_ni_t *ni) +kgnilnd_startup(struct lnet_ni *ni) { int rc, devno; kgn_net_t *net; @@ -2775,7 +2703,7 @@ kgnilnd_startup(lnet_ni_t *ni) } void -kgnilnd_shutdown(lnet_ni_t *ni) +kgnilnd_shutdown(struct lnet_ni *ni) { kgn_net_t *net = ni->ni_data; int i; @@ -2837,8 +2765,7 @@ kgnilnd_shutdown(lnet_ni_t *ni) "Waiting for %d references to clear on net %d\n", atomic_read(&net->gnn_refcount), net->gnn_netnum); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout_uninterruptible(cfs_time_seconds(1)); } /* release ref from kgnilnd_startup */ @@ -2891,7 +2818,7 @@ static int __init kgnilnd_init(void) if (rc != 0) return rc; - printk(KERN_INFO "Lustre: kgnilnd build version: "KGNILND_BUILD_REV"\n"); + LCONSOLE_INFO("Lustre: kgnilnd build version: "LUSTRE_VERSION_STRING"\n"); kgnilnd_insert_sysctl(); kgnilnd_proc_init(); @@ -2903,7 +2830,7 @@ static int __init kgnilnd_init(void) MODULE_AUTHOR("Cray, Inc. "); MODULE_DESCRIPTION("Gemini LNet Network Driver"); -MODULE_VERSION(KGNILND_BUILD_REV); +MODULE_VERSION(LUSTRE_VERSION_STRING); MODULE_LICENSE("GPL"); module_init(kgnilnd_init);