#include <linux/sched/signal.h>
#endif
+#include <lnet/udsp.h>
#include <lnet/lib-lnet.h>
#define D_LNI D_CONSOLE
module_param(rnet_htable_size, int, 0444);
MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
-static int use_tcp_bonding = false;
+static int use_tcp_bonding;
module_param(use_tcp_bonding, int, 0444);
MODULE_PARM_DESC(use_tcp_bonding,
- "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
+ "use_tcp_bonding parameter has been removed");
unsigned int lnet_numa_range = 0;
module_param(lnet_numa_range, uint, 0444);
MODULE_PARM_DESC(lnet_recovery_interval,
"Interval to recover unhealthy interfaces in seconds");
+unsigned int lnet_recovery_limit;
+module_param(lnet_recovery_limit, uint, 0644);
+MODULE_PARM_DESC(lnet_recovery_limit,
+ "How long to attempt recovery of unhealthy peer interfaces in seconds. Set to 0 to allow indefinite recovery");
+
static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
static int intf_max_set(const char *val, cfs_kernel_param_arg_t *kp);
MODULE_PARM_DESC(lnet_drop_asym_route,
"Set to 1 to drop asymmetrical route messages.");
-#define LNET_TRANSACTION_TIMEOUT_NO_HEALTH_DEFAULT 50
-#define LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT 50
-
-unsigned lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT;
+#define LNET_TRANSACTION_TIMEOUT_DEFAULT 50
+unsigned int lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_DEFAULT;
static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp);
#ifdef HAVE_KERNEL_PARAM_OPS
static struct kernel_param_ops param_ops_transaction_timeout = {
MODULE_PARM_DESC(lnet_transaction_timeout,
"Maximum number of seconds to wait for a peer response.");
-#define LNET_RETRY_COUNT_HEALTH_DEFAULT 2
-unsigned lnet_retry_count = LNET_RETRY_COUNT_HEALTH_DEFAULT;
+#define LNET_RETRY_COUNT_DEFAULT 2
+unsigned int lnet_retry_count = LNET_RETRY_COUNT_DEFAULT;
static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp);
#ifdef HAVE_KERNEL_PARAM_OPS
static struct kernel_param_ops param_ops_retry_count = {
MODULE_PARM_DESC(lnet_retry_count,
"Maximum number of times to retry transmitting a message");
+unsigned int lnet_response_tracking = 3;
+static int response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp);
+
+#ifdef HAVE_KERNEL_PARAM_OPS
+static struct kernel_param_ops param_ops_response_tracking = {
+ .set = response_tracking_set,
+ .get = param_get_int,
+};
+
+#define param_check_response_tracking(name, p) \
+ __param_check(name, p, int)
+module_param(lnet_response_tracking, response_tracking, 0644);
+#else
+module_param_call(lnet_response_tracking, response_tracking_set, param_get_int,
+ &lnet_response_tracking, 0644);
+#endif
+MODULE_PARM_DESC(lnet_response_tracking,
+ "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
+
+#define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
+ (LNET_RETRY_COUNT_DEFAULT + 1))
+unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
+static void lnet_set_lnd_timeout(void)
+{
+ lnet_lnd_timeout = (lnet_transaction_timeout - 1) /
+ (lnet_retry_count + 1);
+}
-unsigned lnet_lnd_timeout = LNET_LND_DEFAULT_TIMEOUT;
unsigned int lnet_current_net_count;
/*
return -EINVAL;
}
- /*
- * if we're turning on health then use the health timeout
- * defaults.
- */
- if (*sensitivity == 0 && value != 0) {
- lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT;
- lnet_retry_count = LNET_RETRY_COUNT_HEALTH_DEFAULT;
- /*
- * if we're turning off health then use the no health timeout
- * default.
- */
- } else if (*sensitivity != 0 && value == 0) {
- lnet_transaction_timeout =
- LNET_TRANSACTION_TIMEOUT_NO_HEALTH_DEFAULT;
+ if (*sensitivity != 0 && value == 0 && lnet_retry_count != 0) {
lnet_retry_count = 0;
+ lnet_set_lnd_timeout();
}
*sensitivity = value;
discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
{
int rc;
- unsigned *discovery = (unsigned *)kp->arg;
+ unsigned *discovery_off = (unsigned *)kp->arg;
unsigned long value;
struct lnet_ping_buffer *pbuf;
*/
mutex_lock(&the_lnet.ln_api_mutex);
- if (value == *discovery) {
+ if (value == *discovery_off) {
mutex_unlock(&the_lnet.ln_api_mutex);
return 0;
}
* updating the peers
*/
if (the_lnet.ln_state != LNET_STATE_RUNNING) {
- *discovery = value;
+ *discovery_off = value;
mutex_unlock(&the_lnet.ln_api_mutex);
return 0;
}
pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
lnet_net_unlock(LNET_LOCK_EX);
- /*
- * Always update the peers. This will result in a push to the
- * peers with the updated capabilities feature mask. The peer can
- * then take appropriate action to update its representation of
- * the node.
- *
- * If discovery is already off, turn it on first before pushing
- * the update. The discovery flag must be on before pushing.
- * otherwise if the flag is on and we're turning it off then push
- * first before turning the flag off. In the former case the flag
- * is being set twice, but I find it's better to do that rather
- * than have duplicate code in an if/else statement.
- */
- if (*discovery > 0 && value == 0)
- *discovery = value;
- lnet_push_update_to_peers(1);
- *discovery = value;
+ /* only send a push when we're turning off discovery */
+ if (*discovery_off <= 0 && value > 0)
+ lnet_push_update_to_peers(1);
+ *discovery_off = value;
mutex_unlock(&the_lnet.ln_api_mutex);
*/
mutex_lock(&the_lnet.ln_api_mutex);
- if (value < lnet_retry_count || value == 0) {
+ if (value <= lnet_retry_count || value == 0) {
mutex_unlock(&the_lnet.ln_api_mutex);
CERROR("Invalid value for lnet_transaction_timeout (%lu). "
"Has to be greater than lnet_retry_count (%u)\n",
}
*transaction_to = value;
- if (lnet_retry_count == 0)
- lnet_lnd_timeout = value;
- else
- lnet_lnd_timeout = value / lnet_retry_count;
+ /* Update the lnet_lnd_timeout now that we've modified the
+ * transaction timeout
+ */
+ lnet_set_lnd_timeout();
mutex_unlock(&the_lnet.ln_api_mutex);
*/
mutex_lock(&the_lnet.ln_api_mutex);
- if (lnet_health_sensitivity == 0) {
+ if (lnet_health_sensitivity == 0 && value > 0) {
mutex_unlock(&the_lnet.ln_api_mutex);
- CERROR("Can not set retry_count when health feature is turned off\n");
+ CERROR("Can not set lnet_retry_count when health feature is turned off\n");
return -EINVAL;
}
*retry_count = value;
- if (value == 0)
- lnet_lnd_timeout = lnet_transaction_timeout;
- else
- lnet_lnd_timeout = lnet_transaction_timeout / value;
+ /* Update the lnet_lnd_timeout now that we've modified the
+ * retry count
+ */
+ lnet_set_lnd_timeout();
mutex_unlock(&the_lnet.ln_api_mutex);
return 0;
}
-static char *
+static int
+response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp)
+{
+ int rc;
+ unsigned long new_value;
+
+ rc = kstrtoul(val, 0, &new_value);
+ if (rc) {
+ CERROR("Invalid value for 'lnet_response_tracking'\n");
+ return -EINVAL;
+ }
+
+ if (new_value < 0 || new_value > 3) {
+ CWARN("Invalid value (%lu) for 'lnet_response_tracking'\n",
+ new_value);
+ return -EINVAL;
+ }
+
+ lnet_response_tracking = new_value;
+
+ return 0;
+}
+
+static const char *
lnet_get_routes(void)
{
return routes;
}
-static char *
+static const char *
lnet_get_networks(void)
{
- char *nets;
- int rc;
+ const char *nets;
+ int rc;
if (*networks != 0 && *ip2nets != 0) {
LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
struct kmem_cache *lnet_mes_cachep; /* MEs kmem_cache */
struct kmem_cache *lnet_small_mds_cachep; /* <= LNET_SMALL_MD_SIZE bytes
* MDs kmem_cache */
+struct kmem_cache *lnet_udsp_cachep; /* udsp cache */
struct kmem_cache *lnet_rspt_cachep; /* response tracker cache */
struct kmem_cache *lnet_msg_cachep;
if (!lnet_small_mds_cachep)
return -ENOMEM;
+ lnet_udsp_cachep = kmem_cache_create("lnet_udsp",
+ sizeof(struct lnet_udsp),
+ 0, 0, NULL);
+ if (!lnet_udsp_cachep)
+ return -ENOMEM;
+
lnet_rspt_cachep = kmem_cache_create("lnet_rspt", sizeof(struct lnet_rsp_tracker),
0, 0, NULL);
if (!lnet_rspt_cachep)
lnet_msg_cachep = NULL;
}
-
if (lnet_rspt_cachep) {
kmem_cache_destroy(lnet_rspt_cachep);
lnet_rspt_cachep = NULL;
}
+ if (lnet_udsp_cachep) {
+ kmem_cache_destroy(lnet_udsp_cachep);
+ lnet_udsp_cachep = NULL;
+ }
+
if (lnet_small_mds_cachep) {
kmem_cache_destroy(lnet_small_mds_cachep);
lnet_small_mds_cachep = NULL;
/* Wire protocol assertions generated by 'wirecheck'
* running on Linux robert.bartonsoftware.com 2.6.8-1.521
* #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
- * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
+ * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
+ */
/* Constants... */
BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
BUILD_BUG_ON(LNET_MSG_REPLY != 3);
BUILD_BUG_ON(LNET_MSG_HELLO != 4);
+ BUILD_BUG_ON((int)sizeof(lnet_nid_t) != 8);
+ BUILD_BUG_ON((int)sizeof(lnet_pid_t) != 4);
+
+ /* Checks for struct lnet_process_id_packed */
+ BUILD_BUG_ON((int)sizeof(struct lnet_process_id_packed) != 12);
+ BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, nid) != 0);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->nid) != 8);
+ BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, pid) != 8);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->pid) != 4);
+
/* Checks for struct lnet_handle_wire */
BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) != 4);
BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_ni) != 16);
BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_ni) != 0);
+
+ /* Acceptor connection request */
+ BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
+
+ /* Checks for struct lnet_acceptor_connreq */
+ BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq) != 16);
+ BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_magic) != 0);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_magic) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_version) != 4);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_version) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_nid) != 8);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_nid) != 8);
+
+ /* Checks for struct lnet_counters_common */
+ BUILD_BUG_ON((int)sizeof(struct lnet_counters_common) != 60);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_alloc) != 0);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_alloc) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_max) != 4);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_max) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_errors) != 8);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_errors) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_count) != 12);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_count) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_count) != 16);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_count) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_count) != 20);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_count) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_count) != 24);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_count) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_length) != 28);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_length) != 8);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_length) != 36);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_length) != 8);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_length) != 44);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_length) != 8);
+ BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_length) != 52);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_length) != 8);
}
static const struct lnet_lnd *lnet_find_lnd_by_type(__u32 type)
}
EXPORT_SYMBOL(lnet_unregister_lnd);
-void
-lnet_counters_get_common(struct lnet_counters_common *common)
+static void
+lnet_counters_get_common_locked(struct lnet_counters_common *common)
{
struct lnet_counters *ctr;
int i;
+ /* FIXME !!! Their is no assert_lnet_net_locked() to ensure this
+ * actually called under the protection of the lnet_net_lock.
+ */
memset(common, 0, sizeof(*common));
- lnet_net_lock(LNET_LOCK_EX);
-
cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
common->lcc_msgs_max += ctr->lct_common.lcc_msgs_max;
common->lcc_msgs_alloc += ctr->lct_common.lcc_msgs_alloc;
common->lcc_route_length += ctr->lct_common.lcc_route_length;
common->lcc_drop_length += ctr->lct_common.lcc_drop_length;
}
+}
+
+void
+lnet_counters_get_common(struct lnet_counters_common *common)
+{
+ lnet_net_lock(LNET_LOCK_EX);
+ lnet_counters_get_common_locked(common);
lnet_net_unlock(LNET_LOCK_EX);
}
EXPORT_SYMBOL(lnet_counters_get_common);
-void
+int
lnet_counters_get(struct lnet_counters *counters)
{
struct lnet_counters *ctr;
struct lnet_counters_health *health = &counters->lct_health;
- int i;
+ int i, rc = 0;
memset(counters, 0, sizeof(*counters));
- lnet_counters_get_common(&counters->lct_common);
-
lnet_net_lock(LNET_LOCK_EX);
+ if (the_lnet.ln_state != LNET_STATE_RUNNING)
+ GOTO(out_unlock, rc = -ENODEV);
+
+ lnet_counters_get_common_locked(&counters->lct_common);
+
cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
health->lch_rst_alloc += ctr->lct_health.lch_rst_alloc;
health->lch_resend_count += ctr->lct_health.lch_resend_count;
health->lch_network_timeout_count +=
ctr->lct_health.lch_network_timeout_count;
}
+out_unlock:
lnet_net_unlock(LNET_LOCK_EX);
+ return rc;
}
EXPORT_SYMBOL(lnet_counters_get);
lnet_net_lock(LNET_LOCK_EX);
+ if (the_lnet.ln_state != LNET_STATE_RUNNING)
+ goto avoid_reset;
+
cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
memset(counters, 0, sizeof(struct lnet_counters));
-
+avoid_reset:
lnet_net_unlock(LNET_LOCK_EX);
}
INIT_LIST_HEAD(&the_lnet.ln_dc_expired);
INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
+ INIT_LIST_HEAD(&the_lnet.ln_udsp_list);
init_waitqueue_head(&the_lnet.ln_dc_waitq);
- the_lnet.ln_mt_eq = NULL;
+ the_lnet.ln_mt_handler = NULL;
init_completion(&the_lnet.ln_started);
rc = lnet_slab_setup();
the_lnet.ln_mt_zombie_rstqs = NULL;
}
- if (the_lnet.ln_mt_eq) {
- LNetEQFree(the_lnet.ln_mt_eq);
- the_lnet.ln_mt_eq = NULL;
- }
+ lnet_assert_handler_unused(the_lnet.ln_mt_handler);
+ the_lnet.ln_mt_handler = NULL;
lnet_portals_destroy();
the_lnet.ln_counters = NULL;
}
lnet_destroy_remote_nets_table();
+ lnet_udsp_destroy(true);
lnet_slab_cleanup();
return 0;
return NULL;
}
+void
+lnet_net_clr_pref_rtrs(struct lnet_net *net)
+{
+ struct list_head zombies;
+ struct lnet_nid_list *ne;
+ struct lnet_nid_list *tmp;
+
+ INIT_LIST_HEAD(&zombies);
+
+ lnet_net_lock(LNET_LOCK_EX);
+ list_splice_init(&net->net_rtr_pref_nids, &zombies);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
+ list_del_init(&ne->nl_list);
+ LIBCFS_FREE(ne, sizeof(*ne));
+ }
+}
+
+int
+lnet_net_add_pref_rtr(struct lnet_net *net,
+ lnet_nid_t gw_nid)
+__must_hold(&the_lnet.ln_api_mutex)
+{
+ struct lnet_nid_list *ne;
+
+ /* This function is called with api_mutex held. When the api_mutex
+ * is held the list can not be modified, as it is only modified as
+ * a result of applying a UDSP and that happens under api_mutex
+ * lock.
+ */
+ list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
+ if (ne->nl_nid == gw_nid)
+ return -EEXIST;
+ }
+
+ LIBCFS_ALLOC(ne, sizeof(*ne));
+ if (!ne)
+ return -ENOMEM;
+
+ ne->nl_nid = gw_nid;
+
+ /* Lock the cpt to protect against addition and checks in the
+ * selection algorithm
+ */
+ lnet_net_lock(LNET_LOCK_EX);
+ list_add(&ne->nl_list, &net->net_rtr_pref_nids);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ return 0;
+}
+
+bool
+lnet_net_is_pref_rtr_locked(struct lnet_net *net, lnet_nid_t rtr_nid)
+{
+ struct lnet_nid_list *ne;
+
+ CDEBUG(D_NET, "%s: rtr pref emtpy: %d\n",
+ libcfs_net2str(net->net_id),
+ list_empty(&net->net_rtr_pref_nids));
+
+ if (list_empty(&net->net_rtr_pref_nids))
+ return false;
+
+ list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
+ CDEBUG(D_NET, "Comparing pref %s with gw %s\n",
+ libcfs_nid2str(ne->nl_nid),
+ libcfs_nid2str(rtr_nid));
+ if (rtr_nid == ne->nl_nid)
+ return true;
+ }
+
+ return false;
+}
+
unsigned int
lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
{
/* Loopback is guaranteed to be present */
if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max)
return -ERANGE;
- if (LNET_NETTYP(LNET_NIDNET(LNET_PING_INFO_LONI(pinfo))) != LOLND)
+ if (LNET_PING_INFO_LONI(pinfo) != LNET_NID_LO_0)
return -EPROTO;
return 0;
}
static void
lnet_ping_target_event_handler(struct lnet_event *event)
{
- struct lnet_ping_buffer *pbuf = event->md.user_ptr;
+ struct lnet_ping_buffer *pbuf = event->md_user_ptr;
if (event->unlinked)
lnet_ping_buffer_decref(pbuf);
struct lnet_md md = { NULL };
int rc;
- if (set_eq) {
- the_lnet.ln_ping_target_eq =
- LNetEQAlloc(lnet_ping_target_event_handler);
- if (IS_ERR(the_lnet.ln_ping_target_eq)) {
- rc = PTR_ERR(the_lnet.ln_ping_target_eq);
- CERROR("Can't allocate ping buffer EQ: %d\n", rc);
- return rc;
- }
- }
+ if (set_eq)
+ the_lnet.ln_ping_target_handler =
+ lnet_ping_target_event_handler;
*ppbuf = lnet_ping_target_create(ni_count);
if (*ppbuf == NULL) {
md.max_size = 0;
md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
LNET_MD_MANAGE_REMOTE;
- md.eq_handle = the_lnet.ln_ping_target_eq;
+ md.handler = the_lnet.ln_ping_target_handler;
md.user_ptr = *ppbuf;
- rc = LNetMDAttach(me, md, LNET_RETAIN, ping_mdh);
+ rc = LNetMDAttach(me, &md, LNET_RETAIN, ping_mdh);
if (rc != 0) {
CERROR("Can't attach ping target MD: %d\n", rc);
- goto fail_unlink_ping_me;
+ goto fail_decref_ping_buffer;
}
lnet_ping_buffer_addref(*ppbuf);
return 0;
-fail_unlink_ping_me:
- LNetMEUnlink(me);
fail_decref_ping_buffer:
LASSERT(atomic_read(&(*ppbuf)->pb_refcnt) == 1);
lnet_ping_buffer_decref(*ppbuf);
*ppbuf = NULL;
fail_free_eq:
- if (set_eq)
- LNetEQFree(the_lnet.ln_ping_target_eq);
-
return rc;
}
lnet_ping_md_unlink(the_lnet.ln_ping_target,
&the_lnet.ln_ping_target_md);
- LNetEQFree(the_lnet.ln_ping_target_eq);
-
+ lnet_assert_handler_unused(the_lnet.ln_ping_target_handler);
lnet_ping_target_destroy();
}
md.max_size = 0;
md.options = LNET_MD_OP_PUT | LNET_MD_TRUNCATE;
md.user_ptr = pbuf;
- md.eq_handle = the_lnet.ln_push_target_eq;
+ md.handler = the_lnet.ln_push_target_handler;
- rc = LNetMDAttach(me, md, LNET_UNLINK, mdhp);
+ rc = LNetMDAttach(me, &md, LNET_UNLINK, mdhp);
if (rc) {
CERROR("Can't attach push MD: %d\n", rc);
- LNetMEUnlink(me);
lnet_ping_buffer_decref(pbuf);
pbuf->pb_needs_post = true;
return rc;
static void lnet_push_target_event_handler(struct lnet_event *ev)
{
- struct lnet_ping_buffer *pbuf = ev->md.user_ptr;
+ struct lnet_ping_buffer *pbuf = ev->md_user_ptr;
CDEBUG(D_NET, "type %d status %d unlinked %d\n", ev->type, ev->status,
ev->unlinked);
if (the_lnet.ln_push_target)
return -EALREADY;
- the_lnet.ln_push_target_eq =
- LNetEQAlloc(lnet_push_target_event_handler);
- if (IS_ERR(the_lnet.ln_push_target_eq)) {
- rc = PTR_ERR(the_lnet.ln_push_target_eq);
- CERROR("Can't allocated push target EQ: %d\n", rc);
- return rc;
- }
+ the_lnet.ln_push_target_handler =
+ lnet_push_target_event_handler;
rc = LNetSetLazyPortal(LNET_RESERVED_PORTAL);
LASSERT(rc == 0);
if (rc) {
LNetClearLazyPortal(LNET_RESERVED_PORTAL);
- LNetEQFree(the_lnet.ln_push_target_eq);
- the_lnet.ln_push_target_eq = NULL;
+ the_lnet.ln_push_target_handler = NULL;
}
return rc;
the_lnet.ln_push_target_nnis = 0;
LNetClearLazyPortal(LNET_RESERVED_PORTAL);
- LNetEQFree(the_lnet.ln_push_target_eq);
- the_lnet.ln_push_target_eq = NULL;
+ lnet_assert_handler_unused(the_lnet.ln_push_target_handler);
+ the_lnet.ln_push_target_handler = NULL;
}
static int
}
if (!list_empty(&ni->ni_netlist)) {
+ /* Unlock mutex while waiting to allow other
+ * threads to read the LNet state and fall through
+ * to avoid deadlock
+ */
lnet_net_unlock(LNET_LOCK_EX);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
++i;
if ((i & (-i)) == i) {
CDEBUG(D_WARNING,
libcfs_nid2str(ni->ni_nid));
}
schedule_timeout_uninterruptible(cfs_time_seconds(1));
+
+ mutex_lock(&the_lnet.ln_api_mutex);
lnet_net_lock(LNET_LOCK_EX);
continue;
}
* After than we want to delete the network being added,
* to avoid a memory leak.
*/
-
- /*
- * When a network uses TCP bonding then all its interfaces
- * must be specified when the network is first defined: the
- * TCP bonding code doesn't allow for interfaces to be added
- * or removed.
- */
- if (net_l != net && net_l != NULL && use_tcp_bonding &&
- LNET_NETTYP(net_l->net_id) == SOCKLND) {
- rc = -EINVAL;
- goto failed0;
- }
-
while (!list_empty(&net->net_ni_added)) {
ni = list_entry(net->net_ni_added.next, struct lnet_ni,
ni_netlist);
/* make sure that the the NI we're about to start
* up is actually unique. if it's not fail. */
if (!lnet_ni_unique_net(&net_l->net_ni_list,
- ni->ni_interfaces[0])) {
+ ni->ni_interface)) {
rc = -EEXIST;
goto failed1;
}
goto err_empty_list;
}
+ if (use_tcp_bonding)
+ CWARN("use_tcp_bonding has been removed. Use Multi-Rail and Dynamic Discovery instead, see LU-13641\n");
+
/* If LNet is being initialized via DLC it is possible
* that the user requests not to load module parameters (ones which
* are supported by DLC) on initialization. Therefore, make sure not
* in this case. On cleanup in case of failure only clean up
* routes if it has been loaded */
if (!the_lnet.ln_nis_from_mod_params) {
- rc = lnet_parse_networks(&net_head, lnet_get_networks(),
- use_tcp_bonding);
+ rc = lnet_parse_networks(&net_head, lnet_get_networks());
if (rc < 0)
goto err_empty_list;
}
lnet_ping_target_update(pbuf, ping_mdh);
- the_lnet.ln_mt_eq = LNetEQAlloc(lnet_mt_event_handler);
- if (IS_ERR(the_lnet.ln_mt_eq)) {
- rc = PTR_ERR(the_lnet.ln_mt_eq);
- CERROR("Can't allocate monitor thread EQ: %d\n", rc);
- goto err_stop_ping;
- }
+ the_lnet.ln_mt_handler = lnet_mt_event_handler;
rc = lnet_push_target_init();
if (rc != 0)
if (!ni || !cfg_ni || !tun)
return;
- if (ni->ni_interfaces[0] != NULL) {
- for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
- if (ni->ni_interfaces[i] != NULL) {
- strncpy(cfg_ni->lic_ni_intf[i],
- ni->ni_interfaces[i],
- sizeof(cfg_ni->lic_ni_intf[i]));
- }
- }
+ if (ni->ni_interface != NULL) {
+ strncpy(cfg_ni->lic_ni_intf,
+ ni->ni_interface,
+ sizeof(cfg_ni->lic_ni_intf));
}
cfg_ni->lic_nid = ni->ni_nid;
- if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
+ if (ni->ni_nid == LNET_NID_LO_0)
cfg_ni->lic_status = LNET_NI_STATUS_UP;
else
cfg_ni->lic_status = ni->ni_status->ns_status;
- cfg_ni->lic_tcp_bonding = use_tcp_bonding;
cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
if (!net_config)
return;
- BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
- ARRAY_SIZE(net_config->ni_interfaces));
-
- for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
- if (!ni->ni_interfaces[i])
- break;
+ if (!ni->ni_interface)
+ return;
- strncpy(net_config->ni_interfaces[i],
- ni->ni_interfaces[i],
- sizeof(net_config->ni_interfaces[i]));
- }
+ strncpy(net_config->ni_interface,
+ ni->ni_interface,
+ sizeof(net_config->ni_interface));
config->cfg_nid = ni->ni_nid;
config->cfg_config_u.cfg_net.net_peer_timeout =
config->cfg_config_u.cfg_net.net_peer_rtr_credits =
ni->ni_net->net_tunables.lct_peer_rtr_credits;
- if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
+ if (ni->ni_nid == LNET_NID_LO_0)
net_config->ni_status = LNET_NI_STATUS_UP;
else
net_config->ni_status = ni->ni_status->ns_status;
return NULL;
}
+int lnet_get_net_healthv_locked(struct lnet_net *net)
+{
+ struct lnet_ni *ni;
+ int best_healthv = 0;
+ int healthv;
+
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ healthv = atomic_read(&ni->ni_healthv);
+ if (healthv > best_healthv)
+ best_healthv = healthv;
+ }
+
+ return best_healthv;
+}
+
struct lnet_ni *
lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
{
static int lnet_add_net_common(struct lnet_net *net,
struct lnet_ioctl_config_lnd_tunables *tun)
{
- __u32 net_id;
+ struct lnet_handle_md ping_mdh;
struct lnet_ping_buffer *pbuf;
- struct lnet_handle_md ping_mdh;
- int rc;
struct lnet_remotenet *rnet;
- int net_ni_count;
+ struct lnet_ni *ni;
+ int net_ni_count;
+ __u32 net_id;
+ int rc;
lnet_net_lock(LNET_LOCK_EX);
rnet = lnet_find_rnet_locked(net->net_id);
lnet_net_lock(LNET_LOCK_EX);
net = lnet_get_net_locked(net_id);
- lnet_net_unlock(LNET_LOCK_EX);
-
LASSERT(net);
+ /* apply the UDSPs */
+ rc = lnet_udsp_apply_policies_on_net(net);
+ if (rc)
+ CERROR("Failed to apply UDSPs on local net %s\n",
+ libcfs_net2str(net->net_id));
+
+ /* At this point we lost track of which NI was just added, so we
+ * just re-apply the policies on all of the NIs on this net
+ */
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ rc = lnet_udsp_apply_policies_on_ni(ni);
+ if (rc)
+ CERROR("Failed to apply UDSPs on ni %s\n",
+ libcfs_nid2str(ni->ni_nid));
+ }
+ lnet_net_unlock(LNET_LOCK_EX);
+
/*
* Start the acceptor thread if this is the first network
* being added that requires the thread.
return rc;
}
+static void
+lnet_set_tune_defaults(struct lnet_ioctl_config_lnd_tunables *tun)
+{
+ if (tun) {
+ if (!tun->lt_cmn.lct_peer_timeout)
+ tun->lt_cmn.lct_peer_timeout = DEFAULT_PEER_TIMEOUT;
+ if (!tun->lt_cmn.lct_peer_tx_credits)
+ tun->lt_cmn.lct_peer_tx_credits = DEFAULT_PEER_CREDITS;
+ if (!tun->lt_cmn.lct_max_tx_credits)
+ tun->lt_cmn.lct_max_tx_credits = DEFAULT_CREDITS;
+ }
+}
+
static int lnet_handle_legacy_ip2nets(char *ip2nets,
struct lnet_ioctl_config_lnd_tunables *tun)
{
struct lnet_net *net;
- char *nets;
+ const char *nets;
int rc;
LIST_HEAD(net_head);
if (rc < 0)
return rc;
- rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
+ rc = lnet_parse_networks(&net_head, nets);
if (rc < 0)
return rc;
+ lnet_set_tune_defaults(tun);
+
mutex_lock(&the_lnet.ln_api_mutex);
while (!list_empty(&net_head)) {
net = list_entry(net_head.next, struct lnet_net, net_list);
}
ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
- conf->lic_ni_intf[0]);
+ conf->lic_ni_intf);
if (!ni)
return -ENOMEM;
+ lnet_set_tune_defaults(tun);
+
mutex_lock(&the_lnet.ln_api_mutex);
rc = lnet_add_net_common(net, tun);
LIST_HEAD(net_head);
int rc;
struct lnet_ioctl_config_lnd_tunables tun;
- char *nets = conf->cfg_config_u.cfg_net.net_intf;
+ const char *nets = conf->cfg_config_u.cfg_net.net_intf;
/* Create a net/ni structures for the network string */
- rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
+ rc = lnet_parse_networks(&net_head, nets);
if (rc <= 0)
return rc == 0 ? -EINVAL : rc;
memset(&tun, 0, sizeof(tun));
tun.lt_cmn.lct_peer_timeout =
- conf->cfg_config_u.cfg_net.net_peer_timeout;
+ (!conf->cfg_config_u.cfg_net.net_peer_timeout) ? DEFAULT_PEER_TIMEOUT :
+ conf->cfg_config_u.cfg_net.net_peer_timeout;
tun.lt_cmn.lct_peer_tx_credits =
- conf->cfg_config_u.cfg_net.net_peer_tx_credits;
+ (!conf->cfg_config_u.cfg_net.net_peer_tx_credits) ? DEFAULT_PEER_CREDITS :
+ conf->cfg_config_u.cfg_net.net_peer_tx_credits;
tun.lt_cmn.lct_peer_rtr_credits =
conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
tun.lt_cmn.lct_max_tx_credits =
- conf->cfg_config_u.cfg_net.net_max_tx_credits;
+ (!conf->cfg_config_u.cfg_net.net_max_tx_credits) ? DEFAULT_CREDITS :
+ conf->cfg_config_u.cfg_net.net_max_tx_credits;
rc = lnet_add_net_common(net, &tun);
return -EINVAL;
mutex_lock(&the_lnet.ln_api_mutex);
- lnet_counters_get(&lnet_stats->st_cntrs);
+ rc = lnet_counters_get(&lnet_stats->st_cntrs);
mutex_unlock(&the_lnet.ln_api_mutex);
- return 0;
+ return rc;
}
case IOC_LIBCFS_CONFIG_RTR:
return 0;
}
+ case IOC_LIBCFS_ADD_UDSP: {
+ struct lnet_ioctl_udsp *ioc_udsp = arg;
+ __u32 bulk_size = ioc_udsp->iou_hdr.ioc_len;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_udsp_demarshal_add(arg, bulk_size);
+ if (!rc) {
+ rc = lnet_udsp_apply_policies(NULL, false);
+ CDEBUG(D_NET, "policy application returned %d\n", rc);
+ rc = 0;
+ }
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+ }
+
+ case IOC_LIBCFS_DEL_UDSP: {
+ struct lnet_ioctl_udsp *ioc_udsp = arg;
+ int idx = ioc_udsp->iou_idx;
+
+ if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
+ return -EINVAL;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_udsp_del_policy(idx);
+ if (!rc) {
+ rc = lnet_udsp_apply_policies(NULL, false);
+ CDEBUG(D_NET, "policy re-application returned %d\n",
+ rc);
+ rc = 0;
+ }
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+ }
+
+ case IOC_LIBCFS_GET_UDSP_SIZE: {
+ struct lnet_ioctl_udsp *ioc_udsp = arg;
+ struct lnet_udsp *udsp;
+
+ if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
+ return -EINVAL;
+
+ rc = 0;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
+ if (!udsp) {
+ rc = -ENOENT;
+ } else {
+ /* coming in iou_idx will hold the idx of the udsp
+ * to get the size of. going out the iou_idx will
+ * hold the size of the UDSP found at the passed
+ * in index.
+ */
+ ioc_udsp->iou_idx = lnet_get_udsp_size(udsp);
+ if (ioc_udsp->iou_idx < 0)
+ rc = -EINVAL;
+ }
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+ }
+
+ case IOC_LIBCFS_GET_UDSP: {
+ struct lnet_ioctl_udsp *ioc_udsp = arg;
+ struct lnet_udsp *udsp;
+
+ if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
+ return -EINVAL;
+
+ rc = 0;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
+ if (!udsp)
+ rc = -ENOENT;
+ else
+ rc = lnet_udsp_marshal(udsp, ioc_udsp);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+ }
+
+ case IOC_LIBCFS_GET_CONST_UDSP_INFO: {
+ struct lnet_ioctl_construct_udsp_info *info = arg;
+
+ if (info->cud_hdr.ioc_len < sizeof(*info))
+ return -EINVAL;
+
+ CDEBUG(D_NET, "GET_UDSP_INFO for %s\n",
+ libcfs_nid2str(info->cud_nid));
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ lnet_udsp_get_construct_info(info);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return 0;
+ }
+
default:
ni = lnet_net2ni_addref(data->ioc_net);
if (ni == NULL)
static void
lnet_ping_event_handler(struct lnet_event *event)
{
- struct ping_data *pd = event->md.user_ptr;
+ struct ping_data *pd = event->md_user_ptr;
CDEBUG(D_NET, "ping event (%d %d)%s\n",
event->type, event->status,
static int lnet_ping(struct lnet_process_id id, signed long timeout,
struct lnet_process_id __user *ids, int n_ids)
{
- struct lnet_eq *eq;
struct lnet_md md = { NULL };
struct ping_data pd = { 0 };
struct lnet_ping_buffer *pbuf;
if (!pbuf)
return -ENOMEM;
- eq = LNetEQAlloc(lnet_ping_event_handler);
- if (IS_ERR(eq)) {
- rc = PTR_ERR(eq);
- CERROR("Can't allocate EQ: %d\n", rc);
- goto fail_ping_buffer_decref;
- }
-
/* initialize md content */
md.start = &pbuf->pb_info;
md.length = LNET_PING_INFO_SIZE(n_ids);
md.max_size = 0;
md.options = LNET_MD_TRUNCATE;
md.user_ptr = &pd;
- md.eq_handle = eq;
+ md.handler = lnet_ping_event_handler;
init_completion(&pd.completion);
- rc = LNetMDBind(md, LNET_UNLINK, &pd.mdh);
+ rc = LNetMDBind(&md, LNET_UNLINK, &pd.mdh);
if (rc != 0) {
CERROR("Can't bind MD: %d\n", rc);
- goto fail_free_eq;
+ goto fail_ping_buffer_decref;
}
rc = LNetGet(LNET_NID_ANY, pd.mdh, id,
}
if (!pd.replied) {
rc = -EIO;
- goto fail_free_eq;
+ goto fail_ping_buffer_decref;
}
nob = pd.rc;
if (nob < 8) {
CERROR("%s: ping info too short %d\n",
libcfs_id2str(id), nob);
- goto fail_free_eq;
+ goto fail_ping_buffer_decref;
}
if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
} else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
CERROR("%s: Unexpected magic %08x\n",
libcfs_id2str(id), pbuf->pb_info.pi_magic);
- goto fail_free_eq;
+ goto fail_ping_buffer_decref;
}
if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
CERROR("%s: ping w/o NI status: 0x%x\n",
libcfs_id2str(id), pbuf->pb_info.pi_features);
- goto fail_free_eq;
+ goto fail_ping_buffer_decref;
}
if (nob < LNET_PING_INFO_SIZE(0)) {
CERROR("%s: Short reply %d(%d min)\n",
libcfs_id2str(id),
nob, (int)LNET_PING_INFO_SIZE(0));
- goto fail_free_eq;
+ goto fail_ping_buffer_decref;
}
if (pbuf->pb_info.pi_nnis < n_ids)
CERROR("%s: Short reply %d(%d expected)\n",
libcfs_id2str(id),
nob, (int)LNET_PING_INFO_SIZE(n_ids));
- goto fail_free_eq;
+ goto fail_ping_buffer_decref;
}
rc = -EFAULT; /* if I segv in copy_to_user()... */
tmpid.pid = pbuf->pb_info.pi_pid;
tmpid.nid = pbuf->pb_info.pi_ni[i].ns_nid;
if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
- goto fail_free_eq;
+ goto fail_ping_buffer_decref;
}
rc = pbuf->pb_info.pi_nnis;
- fail_free_eq:
- LNetEQFree(eq);
-
fail_ping_buffer_decref:
lnet_ping_buffer_decref(pbuf);
return rc;
int cpt;
int i;
int rc;
- int max_intf = lnet_interfaces_max;
if (n_ids <= 0 ||
id.nid == LNET_NID_ANY)
id.pid = LNET_PID_LUSTRE;
/*
- * if the user buffer has more space than the max_intf
- * then only fill it up to max_intf
+ * If the user buffer has more space than the lnet_interfaces_max,
+ * then only fill it up to lnet_interfaces_max.
*/
- if (n_ids > max_intf)
- n_ids = max_intf;
+ if (n_ids > lnet_interfaces_max)
+ n_ids = lnet_interfaces_max;
CFS_ALLOC_PTR_ARRAY(buf, n_ids);
if (!buf)
if (rc)
goto out_decref;
- /* Peer may have changed. */
+ /* The lpni (or lp) for this NID may have changed and our ref is
+ * the only thing keeping the old one around. Release the ref
+ * and lookup the lpni again
+ */
+ lnet_peer_ni_decref_locked(lpni);
+ lpni = lnet_find_peer_ni_locked(id.nid);
+ if (!lpni) {
+ rc = -ENOENT;
+ goto out;
+ }
lp = lpni->lpni_peer_net->lpn_peer;
- if (lp->lp_nnis < n_ids)
- n_ids = lp->lp_nnis;
i = 0;
p = NULL;
if (++i >= n_ids)
break;
}
+ rc = i;
- lnet_net_unlock(cpt);
-
- rc = -EFAULT;
- if (copy_to_user(ids, buf, n_ids * sizeof(*buf)))
- goto out_relock;
- rc = n_ids;
-out_relock:
- lnet_net_lock(cpt);
out_decref:
lnet_peer_ni_decref_locked(lpni);
out:
lnet_net_unlock(cpt);
+ if (rc >= 0)
+ if (copy_to_user(ids, buf, rc * sizeof(*buf)))
+ rc = -EFAULT;
CFS_FREE_PTR_ARRAY(buf, n_ids);
return rc;