Whamcloud - gitweb
LU-13641 socklnd: announce deprecation of 'use_tcp_bonding'
[fs/lustre-release.git] / lnet / lnet / api-ni.c
index 7534a1e..2297772 100644 (file)
@@ -74,7 +74,7 @@ MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
 static int use_tcp_bonding = false;
 module_param(use_tcp_bonding, int, 0444);
 MODULE_PARM_DESC(use_tcp_bonding,
-                "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
+                "use_tcp_bonding parameter has been deprecated");
 
 unsigned int lnet_numa_range = 0;
 module_param(lnet_numa_range, uint, 0444);
@@ -124,6 +124,11 @@ module_param_call(lnet_recovery_interval, recovery_interval_set, param_get_int,
 MODULE_PARM_DESC(lnet_recovery_interval,
                "Interval to recover unhealthy interfaces in seconds");
 
+unsigned int lnet_recovery_limit;
+module_param(lnet_recovery_limit, uint, 0644);
+MODULE_PARM_DESC(lnet_recovery_limit,
+                "How long to attempt recovery of unhealthy peer interfaces in seconds. Set to 0 to allow indefinite recovery");
+
 static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
 static int intf_max_set(const char *val, cfs_kernel_param_arg_t *kp);
 
@@ -182,10 +187,8 @@ module_param_call(lnet_drop_asym_route, drop_asym_route_set, param_get_int,
 MODULE_PARM_DESC(lnet_drop_asym_route,
                 "Set to 1 to drop asymmetrical route messages.");
 
-#define LNET_TRANSACTION_TIMEOUT_NO_HEALTH_DEFAULT 50
-#define LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT 50
-
-unsigned lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT;
+#define LNET_TRANSACTION_TIMEOUT_DEFAULT 50
+unsigned int lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_DEFAULT;
 static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp);
 #ifdef HAVE_KERNEL_PARAM_OPS
 static struct kernel_param_ops param_ops_transaction_timeout = {
@@ -203,8 +206,8 @@ module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int,
 MODULE_PARM_DESC(lnet_transaction_timeout,
                "Maximum number of seconds to wait for a peer response.");
 
-#define LNET_RETRY_COUNT_HEALTH_DEFAULT 2
-unsigned lnet_retry_count = LNET_RETRY_COUNT_HEALTH_DEFAULT;
+#define LNET_RETRY_COUNT_DEFAULT 2
+unsigned int lnet_retry_count = LNET_RETRY_COUNT_DEFAULT;
 static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp);
 #ifdef HAVE_KERNEL_PARAM_OPS
 static struct kernel_param_ops param_ops_retry_count = {
@@ -222,8 +225,34 @@ module_param_call(lnet_retry_count, retry_count_set, param_get_int,
 MODULE_PARM_DESC(lnet_retry_count,
                 "Maximum number of times to retry transmitting a message");
 
+unsigned int lnet_response_tracking = 3;
+static int response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp);
+
+#ifdef HAVE_KERNEL_PARAM_OPS
+static struct kernel_param_ops param_ops_response_tracking = {
+       .set = response_tracking_set,
+       .get = param_get_int,
+};
+
+#define param_check_response_tracking(name, p)  \
+       __param_check(name, p, int)
+module_param(lnet_response_tracking, response_tracking, 0644);
+#else
+module_param_call(lnet_response_tracking, response_tracking_set, param_get_int,
+                 &lnet_response_tracking, 0644);
+#endif
+MODULE_PARM_DESC(lnet_response_tracking,
+                "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
+
+#define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
+                                 (LNET_RETRY_COUNT_DEFAULT + 1))
+unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
+static void lnet_set_lnd_timeout(void)
+{
+       lnet_lnd_timeout = (lnet_transaction_timeout - 1) /
+                          (lnet_retry_count + 1);
+}
 
-unsigned lnet_lnd_timeout = LNET_LND_DEFAULT_TIMEOUT;
 unsigned int lnet_current_net_count;
 
 /*
@@ -267,21 +296,9 @@ sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
                return -EINVAL;
        }
 
-       /*
-        * if we're turning on health then use the health timeout
-        * defaults.
-        */
-       if (*sensitivity == 0 && value != 0) {
-               lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT;
-               lnet_retry_count = LNET_RETRY_COUNT_HEALTH_DEFAULT;
-       /*
-        * if we're turning off health then use the no health timeout
-        * default.
-        */
-       } else if (*sensitivity != 0 && value == 0) {
-               lnet_transaction_timeout =
-                       LNET_TRANSACTION_TIMEOUT_NO_HEALTH_DEFAULT;
+       if (*sensitivity != 0 && value == 0 && lnet_retry_count != 0) {
                lnet_retry_count = 0;
+               lnet_set_lnd_timeout();
        }
 
        *sensitivity = value;
@@ -326,7 +343,7 @@ static int
 discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
 {
        int rc;
-       unsigned *discovery = (unsigned *)kp->arg;
+       unsigned *discovery_off = (unsigned *)kp->arg;
        unsigned long value;
        struct lnet_ping_buffer *pbuf;
 
@@ -344,7 +361,7 @@ discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
         */
        mutex_lock(&the_lnet.ln_api_mutex);
 
-       if (value == *discovery) {
+       if (value == *discovery_off) {
                mutex_unlock(&the_lnet.ln_api_mutex);
                return 0;
        }
@@ -357,7 +374,7 @@ discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
         * updating the peers
         */
        if (the_lnet.ln_state != LNET_STATE_RUNNING) {
-               *discovery = value;
+               *discovery_off = value;
                mutex_unlock(&the_lnet.ln_api_mutex);
                return 0;
        }
@@ -371,23 +388,10 @@ discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
                pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
        lnet_net_unlock(LNET_LOCK_EX);
 
-       /*
-        * Always update the peers. This will result in a push to the
-        * peers with the updated capabilities feature mask. The peer can
-        * then take appropriate action to update its representation of
-        * the node.
-        *
-        * If discovery is already off, turn it on first before pushing
-        * the update. The discovery flag must be on before pushing.
-        * otherwise if the flag is on and we're turning it off then push
-        * first before turning the flag off. In the former case the flag
-        * is being set twice, but I find it's better to do that rather
-        * than have duplicate code in an if/else statement.
-        */
-       if (*discovery > 0 && value == 0)
-               *discovery = value;
-       lnet_push_update_to_peers(1);
-       *discovery = value;
+       /* only send a push when we're turning off discovery */
+       if (*discovery_off <= 0 && value > 0)
+               lnet_push_update_to_peers(1);
+       *discovery_off = value;
 
        mutex_unlock(&the_lnet.ln_api_mutex);
 
@@ -445,7 +449,7 @@ transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp)
         */
        mutex_lock(&the_lnet.ln_api_mutex);
 
-       if (value < lnet_retry_count || value == 0) {
+       if (value <= lnet_retry_count || value == 0) {
                mutex_unlock(&the_lnet.ln_api_mutex);
                CERROR("Invalid value for lnet_transaction_timeout (%lu). "
                       "Has to be greater than lnet_retry_count (%u)\n",
@@ -459,10 +463,10 @@ transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp)
        }
 
        *transaction_to = value;
-       if (lnet_retry_count == 0)
-               lnet_lnd_timeout = value;
-       else
-               lnet_lnd_timeout = value / lnet_retry_count;
+       /* Update the lnet_lnd_timeout now that we've modified the
+        * transaction timeout
+        */
+       lnet_set_lnd_timeout();
 
        mutex_unlock(&the_lnet.ln_api_mutex);
 
@@ -488,9 +492,9 @@ retry_count_set(const char *val, cfs_kernel_param_arg_t *kp)
         */
        mutex_lock(&the_lnet.ln_api_mutex);
 
-       if (lnet_health_sensitivity == 0) {
+       if (lnet_health_sensitivity == 0 && value > 0) {
                mutex_unlock(&the_lnet.ln_api_mutex);
-               CERROR("Can not set retry_count when health feature is turned off\n");
+               CERROR("Can not set lnet_retry_count when health feature is turned off\n");
                return -EINVAL;
        }
 
@@ -504,10 +508,10 @@ retry_count_set(const char *val, cfs_kernel_param_arg_t *kp)
 
        *retry_count = value;
 
-       if (value == 0)
-               lnet_lnd_timeout = lnet_transaction_timeout;
-       else
-               lnet_lnd_timeout = lnet_transaction_timeout / value;
+       /* Update the lnet_lnd_timeout now that we've modified the
+        * retry count
+        */
+       lnet_set_lnd_timeout();
 
        mutex_unlock(&the_lnet.ln_api_mutex);
 
@@ -536,17 +540,40 @@ intf_max_set(const char *val, cfs_kernel_param_arg_t *kp)
        return 0;
 }
 
-static char *
+static int
+response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp)
+{
+       int rc;
+       unsigned long new_value;
+
+       rc = kstrtoul(val, 0, &new_value);
+       if (rc) {
+               CERROR("Invalid value for 'lnet_response_tracking'\n");
+               return -EINVAL;
+       }
+
+       if (new_value < 0 || new_value > 3) {
+               CWARN("Invalid value (%lu) for 'lnet_response_tracking'\n",
+                     new_value);
+               return -EINVAL;
+       }
+
+       lnet_response_tracking = new_value;
+
+       return 0;
+}
+
+static const char *
 lnet_get_routes(void)
 {
        return routes;
 }
 
-static char *
+static const char *
 lnet_get_networks(void)
 {
-       char   *nets;
-       int     rc;
+       const char *nets;
+       int rc;
 
        if (*networks != 0 && *ip2nets != 0) {
                LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
@@ -888,16 +915,17 @@ lnet_unregister_lnd(const struct lnet_lnd *lnd)
 }
 EXPORT_SYMBOL(lnet_unregister_lnd);
 
-void
-lnet_counters_get_common(struct lnet_counters_common *common)
+static void
+lnet_counters_get_common_locked(struct lnet_counters_common *common)
 {
        struct lnet_counters *ctr;
        int i;
 
+       /* FIXME !!! Their is no assert_lnet_net_locked() to ensure this
+        * actually called under the protection of the lnet_net_lock.
+        */
        memset(common, 0, sizeof(*common));
 
-       lnet_net_lock(LNET_LOCK_EX);
-
        cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
                common->lcc_msgs_max     += ctr->lct_common.lcc_msgs_max;
                common->lcc_msgs_alloc   += ctr->lct_common.lcc_msgs_alloc;
@@ -911,23 +939,33 @@ lnet_counters_get_common(struct lnet_counters_common *common)
                common->lcc_route_length += ctr->lct_common.lcc_route_length;
                common->lcc_drop_length  += ctr->lct_common.lcc_drop_length;
        }
+}
+
+void
+lnet_counters_get_common(struct lnet_counters_common *common)
+{
+       lnet_net_lock(LNET_LOCK_EX);
+       lnet_counters_get_common_locked(common);
        lnet_net_unlock(LNET_LOCK_EX);
 }
 EXPORT_SYMBOL(lnet_counters_get_common);
 
-void
+int
 lnet_counters_get(struct lnet_counters *counters)
 {
        struct lnet_counters *ctr;
        struct lnet_counters_health *health = &counters->lct_health;
-       int             i;
+       int i, rc = 0;
 
        memset(counters, 0, sizeof(*counters));
 
-       lnet_counters_get_common(&counters->lct_common);
-
        lnet_net_lock(LNET_LOCK_EX);
 
+       if (the_lnet.ln_state != LNET_STATE_RUNNING)
+               GOTO(out_unlock, rc = -ENODEV);
+
+       lnet_counters_get_common_locked(&counters->lct_common);
+
        cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
                health->lch_rst_alloc    += ctr->lct_health.lch_rst_alloc;
                health->lch_resend_count += ctr->lct_health.lch_resend_count;
@@ -954,7 +992,9 @@ lnet_counters_get(struct lnet_counters *counters)
                health->lch_network_timeout_count +=
                                ctr->lct_health.lch_network_timeout_count;
        }
+out_unlock:
        lnet_net_unlock(LNET_LOCK_EX);
+       return rc;
 }
 EXPORT_SYMBOL(lnet_counters_get);
 
@@ -966,9 +1006,12 @@ lnet_counters_reset(void)
 
        lnet_net_lock(LNET_LOCK_EX);
 
+       if (the_lnet.ln_state != LNET_STATE_RUNNING)
+               goto avoid_reset;
+
        cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
                memset(counters, 0, sizeof(struct lnet_counters));
-
+avoid_reset:
        lnet_net_unlock(LNET_LOCK_EX);
 }
 
@@ -1186,7 +1229,7 @@ lnet_prepare(lnet_pid_t requested_pid)
        INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
        INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
        init_waitqueue_head(&the_lnet.ln_dc_waitq);
-       the_lnet.ln_mt_eq = NULL;
+       the_lnet.ln_mt_handler = NULL;
        init_completion(&the_lnet.ln_started);
 
        rc = lnet_slab_setup();
@@ -1254,8 +1297,6 @@ lnet_prepare(lnet_pid_t requested_pid)
 static int
 lnet_unprepare (void)
 {
-       int rc;
-
        /* NB no LNET_LOCK since this is the last reference.  All LND instances
         * have shut down already, so it is safe to unlink and free all
         * descriptors, even those that appear committed to a network op (eg MD
@@ -1272,11 +1313,8 @@ lnet_unprepare (void)
                the_lnet.ln_mt_zombie_rstqs = NULL;
        }
 
-       if (the_lnet.ln_mt_eq) {
-               rc = LNetEQFree(the_lnet.ln_mt_eq);
-               the_lnet.ln_mt_eq = NULL;
-               LASSERT(rc == 0);
-       }
+       lnet_assert_handler_unused(the_lnet.ln_mt_handler);
+       the_lnet.ln_mt_handler = NULL;
 
        lnet_portals_destroy();
 
@@ -1650,7 +1688,7 @@ lnet_ping_info_validate(struct lnet_ping_info *pinfo)
        /* Loopback is guaranteed to be present */
        if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max)
                return -ERANGE;
-       if (LNET_NETTYP(LNET_NIDNET(LNET_PING_INFO_LONI(pinfo))) != LOLND)
+       if (LNET_PING_INFO_LONI(pinfo) != LNET_NID_LO_0)
                return -EPROTO;
        return 0;
 }
@@ -1680,7 +1718,7 @@ lnet_ping_target_destroy(void)
 static void
 lnet_ping_target_event_handler(struct lnet_event *event)
 {
-       struct lnet_ping_buffer *pbuf = event->md.user_ptr;
+       struct lnet_ping_buffer *pbuf = event->md_user_ptr;
 
        if (event->unlinked)
                lnet_ping_buffer_decref(pbuf);
@@ -1697,17 +1735,11 @@ lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
        };
        struct lnet_me *me;
        struct lnet_md md = { NULL };
-       int rc, rc2;
-
-       if (set_eq) {
-               the_lnet.ln_ping_target_eq =
-                       LNetEQAlloc(lnet_ping_target_event_handler);
-               if (IS_ERR(the_lnet.ln_ping_target_eq)) {
-                       rc = PTR_ERR(the_lnet.ln_ping_target_eq);
-                       CERROR("Can't allocate ping buffer EQ: %d\n", rc);
-                       return rc;
-               }
-       }
+       int rc;
+
+       if (set_eq)
+               the_lnet.ln_ping_target_handler =
+                       lnet_ping_target_event_handler;
 
        *ppbuf = lnet_ping_target_create(ni_count);
        if (*ppbuf == NULL) {
@@ -1732,29 +1764,23 @@ lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
        md.max_size  = 0;
        md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
                       LNET_MD_MANAGE_REMOTE;
-       md.eq_handle = the_lnet.ln_ping_target_eq;
+       md.handler   = the_lnet.ln_ping_target_handler;
        md.user_ptr  = *ppbuf;
 
-       rc = LNetMDAttach(me, md, LNET_RETAIN, ping_mdh);
+       rc = LNetMDAttach(me, &md, LNET_RETAIN, ping_mdh);
        if (rc != 0) {
                CERROR("Can't attach ping target MD: %d\n", rc);
-               goto fail_unlink_ping_me;
+               goto fail_decref_ping_buffer;
        }
        lnet_ping_buffer_addref(*ppbuf);
 
        return 0;
 
-fail_unlink_ping_me:
-       LNetMEUnlink(me);
 fail_decref_ping_buffer:
        LASSERT(atomic_read(&(*ppbuf)->pb_refcnt) == 1);
        lnet_ping_buffer_decref(*ppbuf);
        *ppbuf = NULL;
 fail_free_eq:
-       if (set_eq) {
-               rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
-               LASSERT(rc2 == 0);
-       }
        return rc;
 }
 
@@ -1855,14 +1881,10 @@ lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
 static void
 lnet_ping_target_fini(void)
 {
-       int             rc;
-
        lnet_ping_md_unlink(the_lnet.ln_ping_target,
                            &the_lnet.ln_ping_target_md);
 
-       rc = LNetEQFree(the_lnet.ln_ping_target_eq);
-       LASSERT(rc == 0);
-
+       lnet_assert_handler_unused(the_lnet.ln_ping_target_handler);
        lnet_ping_target_destroy();
 }
 
@@ -1950,12 +1972,11 @@ int lnet_push_target_post(struct lnet_ping_buffer *pbuf,
        md.max_size  = 0;
        md.options   = LNET_MD_OP_PUT | LNET_MD_TRUNCATE;
        md.user_ptr  = pbuf;
-       md.eq_handle = the_lnet.ln_push_target_eq;
+       md.handler   = the_lnet.ln_push_target_handler;
 
-       rc = LNetMDAttach(me, md, LNET_UNLINK, mdhp);
+       rc = LNetMDAttach(me, &md, LNET_UNLINK, mdhp);
        if (rc) {
                CERROR("Can't attach push MD: %d\n", rc);
-               LNetMEUnlink(me);
                lnet_ping_buffer_decref(pbuf);
                pbuf->pb_needs_post = true;
                return rc;
@@ -1968,7 +1989,7 @@ int lnet_push_target_post(struct lnet_ping_buffer *pbuf,
 
 static void lnet_push_target_event_handler(struct lnet_event *ev)
 {
-       struct lnet_ping_buffer *pbuf = ev->md.user_ptr;
+       struct lnet_ping_buffer *pbuf = ev->md_user_ptr;
 
        CDEBUG(D_NET, "type %d status %d unlinked %d\n", ev->type, ev->status,
               ev->unlinked);
@@ -1996,13 +2017,8 @@ static int lnet_push_target_init(void)
        if (the_lnet.ln_push_target)
                return -EALREADY;
 
-       the_lnet.ln_push_target_eq =
-               LNetEQAlloc(lnet_push_target_event_handler);
-       if (IS_ERR(the_lnet.ln_push_target_eq)) {
-               rc = PTR_ERR(the_lnet.ln_push_target_eq);
-               CERROR("Can't allocated push target EQ: %d\n", rc);
-               return rc;
-       }
+       the_lnet.ln_push_target_handler =
+               lnet_push_target_event_handler;
 
        rc = LNetSetLazyPortal(LNET_RESERVED_PORTAL);
        LASSERT(rc == 0);
@@ -2014,8 +2030,7 @@ static int lnet_push_target_init(void)
 
        if (rc) {
                LNetClearLazyPortal(LNET_RESERVED_PORTAL);
-               LNetEQFree(the_lnet.ln_push_target_eq);
-               the_lnet.ln_push_target_eq = NULL;
+               the_lnet.ln_push_target_handler = NULL;
        }
 
        return rc;
@@ -2042,8 +2057,8 @@ static void lnet_push_target_fini(void)
        the_lnet.ln_push_target_nnis = 0;
 
        LNetClearLazyPortal(LNET_RESERVED_PORTAL);
-       LNetEQFree(the_lnet.ln_push_target_eq);
-       the_lnet.ln_push_target_eq = NULL;
+       lnet_assert_handler_unused(the_lnet.ln_push_target_handler);
+       the_lnet.ln_push_target_handler = NULL;
 }
 
 static int
@@ -2104,7 +2119,13 @@ lnet_clear_zombies_nis_locked(struct lnet_net *net)
                }
 
                if (!list_empty(&ni->ni_netlist)) {
+                       /* Unlock mutex while waiting to allow other
+                        * threads to read the LNet state and fall through
+                        * to avoid deadlock
+                        */
                        lnet_net_unlock(LNET_LOCK_EX);
+                       mutex_unlock(&the_lnet.ln_api_mutex);
+
                        ++i;
                        if ((i & (-i)) == i) {
                                CDEBUG(D_WARNING,
@@ -2112,6 +2133,8 @@ lnet_clear_zombies_nis_locked(struct lnet_net *net)
                                       libcfs_nid2str(ni->ni_nid));
                        }
                        schedule_timeout_uninterruptible(cfs_time_seconds(1));
+
+                       mutex_lock(&the_lnet.ln_api_mutex);
                        lnet_net_lock(LNET_LOCK_EX);
                        continue;
                }
@@ -2661,6 +2684,9 @@ LNetNIInit(lnet_pid_t requested_pid)
                goto err_empty_list;
        }
 
+       if (use_tcp_bonding)
+               CWARN("'use_tcp_bonding' option has been deprecated. See LU-13641\n");
+
        /* If LNet is being initialized via DLC it is possible
         * that the user requests not to load module parameters (ones which
         * are supported by DLC) on initialization.  Therefore, make sure not
@@ -2703,12 +2729,7 @@ LNetNIInit(lnet_pid_t requested_pid)
 
        lnet_ping_target_update(pbuf, ping_mdh);
 
-       the_lnet.ln_mt_eq = LNetEQAlloc(lnet_mt_event_handler);
-       if (IS_ERR(the_lnet.ln_mt_eq)) {
-               rc = PTR_ERR(the_lnet.ln_mt_eq);
-               CERROR("Can't allocate monitor thread EQ: %d\n", rc);
-               goto err_stop_ping;
-       }
+       the_lnet.ln_mt_handler = lnet_mt_event_handler;
 
        rc = lnet_push_target_init();
        if (rc != 0)
@@ -2837,7 +2858,7 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
        }
 
        cfg_ni->lic_nid = ni->ni_nid;
-       if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
+       if (ni->ni_nid == LNET_NID_LO_0)
                cfg_ni->lic_status = LNET_NI_STATUS_UP;
        else
                cfg_ni->lic_status = ni->ni_status->ns_status;
@@ -2929,7 +2950,7 @@ lnet_fill_ni_info_legacy(struct lnet_ni *ni,
        config->cfg_config_u.cfg_net.net_peer_rtr_credits =
                ni->ni_net->net_tunables.lct_peer_rtr_credits;
 
-       if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
+       if (ni->ni_nid == LNET_NID_LO_0)
                net_config->ni_status = LNET_NI_STATUS_UP;
        else
                net_config->ni_status = ni->ni_status->ns_status;
@@ -3123,7 +3144,6 @@ static int lnet_add_net_common(struct lnet_net *net,
        int                     rc;
        struct lnet_remotenet *rnet;
        int                     net_ni_count;
-       int                     num_acceptor_nets;
 
        lnet_net_lock(LNET_LOCK_EX);
        rnet = lnet_find_rnet_locked(net->net_id);
@@ -3164,14 +3184,6 @@ static int lnet_add_net_common(struct lnet_net *net,
        else
                memset(&net->net_tunables, -1, sizeof(net->net_tunables));
 
-       /*
-        * before starting this network get a count of the current TCP
-        * networks which require the acceptor thread running. If that
-        * count is == 0 before we start up this network, then we'd want to
-        * start up the acceptor thread after starting up this network
-        */
-       num_acceptor_nets = lnet_count_acceptor_nets();
-
        net_id = net->net_id;
 
        rc = lnet_startup_lndnet(net,
@@ -3189,7 +3201,7 @@ static int lnet_add_net_common(struct lnet_net *net,
         * Start the acceptor thread if this is the first network
         * being added that requires the thread.
         */
-       if (net->net_lnd->lnd_accept && num_acceptor_nets == 0) {
+       if (net->net_lnd->lnd_accept) {
                rc = lnet_acceptor_start();
                if (rc < 0) {
                        /* shutdown the net that we just started */
@@ -3213,11 +3225,24 @@ failed:
        return rc;
 }
 
+static void
+lnet_set_tune_defaults(struct lnet_ioctl_config_lnd_tunables *tun)
+{
+       if (tun) {
+               if (!tun->lt_cmn.lct_peer_timeout)
+                       tun->lt_cmn.lct_peer_timeout = DEFAULT_PEER_TIMEOUT;
+               if (!tun->lt_cmn.lct_peer_tx_credits)
+                       tun->lt_cmn.lct_peer_tx_credits = DEFAULT_PEER_CREDITS;
+               if (!tun->lt_cmn.lct_max_tx_credits)
+                       tun->lt_cmn.lct_max_tx_credits = DEFAULT_CREDITS;
+       }
+}
+
 static int lnet_handle_legacy_ip2nets(char *ip2nets,
                                      struct lnet_ioctl_config_lnd_tunables *tun)
 {
        struct lnet_net *net;
-       char *nets;
+       const char *nets;
        int rc;
        LIST_HEAD(net_head);
 
@@ -3229,6 +3254,8 @@ static int lnet_handle_legacy_ip2nets(char *ip2nets,
        if (rc < 0)
                return rc;
 
+       lnet_set_tune_defaults(tun);
+
        mutex_lock(&the_lnet.ln_api_mutex);
        while (!list_empty(&net_head)) {
                net = list_entry(net_head.next, struct lnet_net, net_list);
@@ -3290,6 +3317,8 @@ int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf)
        if (!ni)
                return -ENOMEM;
 
+       lnet_set_tune_defaults(tun);
+
        mutex_lock(&the_lnet.ln_api_mutex);
 
        rc = lnet_add_net_common(net, tun);
@@ -3342,8 +3371,7 @@ int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf)
 
                lnet_shutdown_lndnet(net);
 
-               if (lnet_count_acceptor_nets() == 0)
-                       lnet_acceptor_stop();
+               lnet_acceptor_stop();
 
                lnet_ping_target_update(pbuf, ping_mdh);
 
@@ -3370,8 +3398,7 @@ int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf)
 
        lnet_shutdown_lndni(ni);
 
-       if (lnet_count_acceptor_nets() == 0)
-               lnet_acceptor_stop();
+       lnet_acceptor_stop();
 
        lnet_ping_target_update(pbuf, ping_mdh);
 
@@ -3402,7 +3429,7 @@ lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
        LIST_HEAD(net_head);
        int rc;
        struct lnet_ioctl_config_lnd_tunables tun;
-       char *nets = conf->cfg_config_u.cfg_net.net_intf;
+       const char *nets = conf->cfg_config_u.cfg_net.net_intf;
 
        /* Create a net/ni structures for the network string */
        rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
@@ -3424,13 +3451,16 @@ lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
        memset(&tun, 0, sizeof(tun));
 
        tun.lt_cmn.lct_peer_timeout =
-         conf->cfg_config_u.cfg_net.net_peer_timeout;
+         (!conf->cfg_config_u.cfg_net.net_peer_timeout) ? DEFAULT_PEER_TIMEOUT :
+               conf->cfg_config_u.cfg_net.net_peer_timeout;
        tun.lt_cmn.lct_peer_tx_credits =
-         conf->cfg_config_u.cfg_net.net_peer_tx_credits;
+         (!conf->cfg_config_u.cfg_net.net_peer_tx_credits) ? DEFAULT_PEER_CREDITS :
+               conf->cfg_config_u.cfg_net.net_peer_tx_credits;
        tun.lt_cmn.lct_peer_rtr_credits =
          conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
        tun.lt_cmn.lct_max_tx_credits =
-         conf->cfg_config_u.cfg_net.net_max_tx_credits;
+         (!conf->cfg_config_u.cfg_net.net_max_tx_credits) ? DEFAULT_CREDITS :
+               conf->cfg_config_u.cfg_net.net_max_tx_credits;
 
        rc = lnet_add_net_common(net, &tun);
 
@@ -3481,8 +3511,7 @@ lnet_dyn_del_net(__u32 net_id)
 
        lnet_shutdown_lndnet(net);
 
-       if (lnet_count_acceptor_nets() == 0)
-               lnet_acceptor_stop();
+       lnet_acceptor_stop();
 
        lnet_ping_target_update(pbuf, ping_mdh);
 
@@ -3738,9 +3767,9 @@ LNetCtl(unsigned int cmd, void *arg)
                        return -EINVAL;
 
                mutex_lock(&the_lnet.ln_api_mutex);
-               lnet_counters_get(&lnet_stats->st_cntrs);
+               rc = lnet_counters_get(&lnet_stats->st_cntrs);
                mutex_unlock(&the_lnet.ln_api_mutex);
-               return 0;
+               return rc;
        }
 
        case IOC_LIBCFS_CONFIG_RTR:
@@ -4144,7 +4173,7 @@ struct ping_data {
 static void
 lnet_ping_event_handler(struct lnet_event *event)
 {
-       struct ping_data *pd = event->md.user_ptr;
+       struct ping_data *pd = event->md_user_ptr;
 
        CDEBUG(D_NET, "ping event (%d %d)%s\n",
               event->type, event->status,
@@ -4164,7 +4193,6 @@ lnet_ping_event_handler(struct lnet_event *event)
 static int lnet_ping(struct lnet_process_id id, signed long timeout,
                     struct lnet_process_id __user *ids, int n_ids)
 {
-       struct lnet_eq *eq;
        struct lnet_md md = { NULL };
        struct ping_data pd = { 0 };
        struct lnet_ping_buffer *pbuf;
@@ -4192,13 +4220,6 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
        if (!pbuf)
                return -ENOMEM;
 
-       eq = LNetEQAlloc(lnet_ping_event_handler);
-       if (IS_ERR(eq)) {
-               rc = PTR_ERR(eq);
-               CERROR("Can't allocate EQ: %d\n", rc);
-               goto fail_ping_buffer_decref;
-       }
-
        /* initialize md content */
        md.start     = &pbuf->pb_info;
        md.length    = LNET_PING_INFO_SIZE(n_ids);
@@ -4206,14 +4227,14 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
        md.max_size  = 0;
        md.options   = LNET_MD_TRUNCATE;
        md.user_ptr  = &pd;
-       md.eq_handle = eq;
+       md.handler   = lnet_ping_event_handler;
 
        init_completion(&pd.completion);
 
-       rc = LNetMDBind(md, LNET_UNLINK, &pd.mdh);
+       rc = LNetMDBind(&md, LNET_UNLINK, &pd.mdh);
        if (rc != 0) {
                CERROR("Can't bind MD: %d\n", rc);
-               goto fail_free_eq;
+               goto fail_ping_buffer_decref;
        }
 
        rc = LNetGet(LNET_NID_ANY, pd.mdh, id,
@@ -4235,7 +4256,7 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
        }
        if (!pd.replied) {
                rc = -EIO;
-               goto fail_free_eq;
+               goto fail_ping_buffer_decref;
        }
 
        nob = pd.rc;
@@ -4246,7 +4267,7 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
        if (nob < 8) {
                CERROR("%s: ping info too short %d\n",
                       libcfs_id2str(id), nob);
-               goto fail_free_eq;
+               goto fail_ping_buffer_decref;
        }
 
        if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
@@ -4254,20 +4275,20 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
        } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
                CERROR("%s: Unexpected magic %08x\n",
                       libcfs_id2str(id), pbuf->pb_info.pi_magic);
-               goto fail_free_eq;
+               goto fail_ping_buffer_decref;
        }
 
        if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
                CERROR("%s: ping w/o NI status: 0x%x\n",
                       libcfs_id2str(id), pbuf->pb_info.pi_features);
-               goto fail_free_eq;
+               goto fail_ping_buffer_decref;
        }
 
        if (nob < LNET_PING_INFO_SIZE(0)) {
                CERROR("%s: Short reply %d(%d min)\n",
                       libcfs_id2str(id),
                       nob, (int)LNET_PING_INFO_SIZE(0));
-               goto fail_free_eq;
+               goto fail_ping_buffer_decref;
        }
 
        if (pbuf->pb_info.pi_nnis < n_ids)
@@ -4277,7 +4298,7 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
                CERROR("%s: Short reply %d(%d expected)\n",
                       libcfs_id2str(id),
                       nob, (int)LNET_PING_INFO_SIZE(n_ids));
-               goto fail_free_eq;
+               goto fail_ping_buffer_decref;
        }
 
        rc = -EFAULT;           /* if I segv in copy_to_user()... */
@@ -4287,16 +4308,10 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
                tmpid.pid = pbuf->pb_info.pi_pid;
                tmpid.nid = pbuf->pb_info.pi_ni[i].ns_nid;
                if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
-                       goto fail_free_eq;
+                       goto fail_ping_buffer_decref;
        }
        rc = pbuf->pb_info.pi_nnis;
 
- fail_free_eq:
-       rc2 = LNetEQFree(eq);
-       if (rc2 != 0)
-               CERROR("rc2 %d\n", rc2);
-       LASSERT(rc2 == 0);
-
  fail_ping_buffer_decref:
        lnet_ping_buffer_decref(pbuf);
        return rc;
@@ -4313,7 +4328,6 @@ lnet_discover(struct lnet_process_id id, __u32 force,
        int cpt;
        int i;
        int rc;
-       int max_intf = lnet_interfaces_max;
 
        if (n_ids <= 0 ||
            id.nid == LNET_NID_ANY)
@@ -4323,11 +4337,11 @@ lnet_discover(struct lnet_process_id id, __u32 force,
                id.pid = LNET_PID_LUSTRE;
 
        /*
-        * if the user buffer has more space than the max_intf
-        * then only fill it up to max_intf
+        * If the user buffer has more space than the lnet_interfaces_max,
+        * then only fill it up to lnet_interfaces_max.
         */
-       if (n_ids > max_intf)
-               n_ids = max_intf;
+       if (n_ids > lnet_interfaces_max)
+               n_ids = lnet_interfaces_max;
 
        CFS_ALLOC_PTR_ARRAY(buf, n_ids);
        if (!buf)
@@ -4355,11 +4369,6 @@ lnet_discover(struct lnet_process_id id, __u32 force,
        if (rc)
                goto out_decref;
 
-       /* Peer may have changed. */
-       lp = lpni->lpni_peer_net->lpn_peer;
-       if (lp->lp_nnis < n_ids)
-               n_ids = lp->lp_nnis;
-
        i = 0;
        p = NULL;
        while ((p = lnet_get_next_peer_ni_locked(lp, NULL, p)) != NULL) {
@@ -4368,20 +4377,16 @@ lnet_discover(struct lnet_process_id id, __u32 force,
                if (++i >= n_ids)
                        break;
        }
+       rc = i;
 
-       lnet_net_unlock(cpt);
-
-       rc = -EFAULT;
-       if (copy_to_user(ids, buf, n_ids * sizeof(*buf)))
-               goto out_relock;
-       rc = n_ids;
-out_relock:
-       lnet_net_lock(cpt);
 out_decref:
        lnet_peer_ni_decref_locked(lpni);
 out:
        lnet_net_unlock(cpt);
 
+       if (rc >= 0)
+               if (copy_to_user(ids, buf, rc * sizeof(*buf)))
+                       rc = -EFAULT;
        CFS_FREE_PTR_ARRAY(buf, n_ids);
 
        return rc;