*/
unsigned int lnet_health_sensitivity = 0;
static int sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp);
+#ifdef HAVE_KERNEL_PARAM_OPS
static struct kernel_param_ops param_ops_health_sensitivity = {
.set = sensitivity_set,
.get = param_get_int,
};
#define param_check_health_sensitivity(name, p) \
__param_check(name, p, int)
-#ifdef HAVE_KERNEL_PARAM_OPS
module_param(lnet_health_sensitivity, health_sensitivity, S_IRUGO|S_IWUSR);
#else
module_param_call(lnet_health_sensitivity, sensitivity_set, param_get_int,
MODULE_PARM_DESC(lnet_health_sensitivity,
"Value to decrement the health value by on error");
+/*
+ * lnet_recovery_interval determines how often we should perform recovery
+ * on unhealthy interfaces.
+ */
+unsigned int lnet_recovery_interval = 1;
+static int recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp);
+#ifdef HAVE_KERNEL_PARAM_OPS
+static struct kernel_param_ops param_ops_recovery_interval = {
+ .set = recovery_interval_set,
+ .get = param_get_int,
+};
+#define param_check_recovery_interval(name, p) \
+ __param_check(name, p, int)
+module_param(lnet_recovery_interval, recovery_interval, S_IRUGO|S_IWUSR);
+#else
+module_param_call(lnet_recovery_interval, recovery_interval_set, param_get_int,
+ &lnet_recovery_interval, S_IRUGO|S_IWUSR);
+#endif
+MODULE_PARM_DESC(lnet_recovery_interval,
+ "Interval to recover unhealthy interfaces in seconds");
+
static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
static int intf_max_set(const char *val, cfs_kernel_param_arg_t *kp);
MODULE_PARM_DESC(lnet_peer_discovery_disabled,
"Set to 1 to disable peer discovery on this node.");
-unsigned lnet_transaction_timeout = 5;
+unsigned lnet_transaction_timeout = 50;
static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp);
+#ifdef HAVE_KERNEL_PARAM_OPS
static struct kernel_param_ops param_ops_transaction_timeout = {
.set = transaction_to_set,
.get = param_get_int,
#define param_check_transaction_timeout(name, p) \
__param_check(name, p, int)
-#ifdef HAVE_KERNEL_PARAM_OPS
module_param(lnet_transaction_timeout, transaction_timeout, S_IRUGO|S_IWUSR);
#else
module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int,
&lnet_transaction_timeout, S_IRUGO|S_IWUSR);
#endif
-MODULE_PARM_DESC(lnet_peer_discovery_disabled,
- "Set to 1 to disable peer discovery on this node.");
+MODULE_PARM_DESC(lnet_transaction_timeout,
+ "Maximum number of seconds to wait for a peer response.");
unsigned lnet_retry_count = 0;
static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp);
+#ifdef HAVE_KERNEL_PARAM_OPS
static struct kernel_param_ops param_ops_retry_count = {
.set = retry_count_set,
.get = param_get_int,
#define param_check_retry_count(name, p) \
__param_check(name, p, int)
-#ifdef HAVE_KERNEL_PARAM_OPS
module_param(lnet_retry_count, retry_count, S_IRUGO|S_IWUSR);
#else
module_param_call(lnet_retry_count, retry_count_set, param_get_int,
}
static int
+recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
+{
+ int rc;
+ unsigned *interval = (unsigned *)kp->arg;
+ unsigned long value;
+
+ rc = kstrtoul(val, 0, &value);
+ if (rc) {
+ CERROR("Invalid module parameter value for 'lnet_recovery_interval'\n");
+ return rc;
+ }
+
+ if (value < 1) {
+ CERROR("lnet_recovery_interval must be at least 1 second\n");
+ return -EINVAL;
+ }
+
+ /*
+ * The purpose of locking the api_mutex here is to ensure that
+ * the correct value ends up stored properly.
+ */
+ mutex_lock(&the_lnet.ln_api_mutex);
+
+ if (the_lnet.ln_state != LNET_STATE_RUNNING) {
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return 0;
+ }
+
+ *interval = value;
+
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return 0;
+}
+
+static int
discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
{
int rc;
list_del_init(&ni->ni_netlist);
/* the ni should be in deleting state. If it's not it's
* a bug */
- LASSERT(ni->ni_state & LNET_NI_STATE_DELETING);
+ LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
cfs_percpt_for_each(ref, j, ni->ni_refs) {
if (*ref == 0)
continue;
lnet_net_lock(LNET_LOCK_EX);
lnet_ni_lock(ni);
- ni->ni_state |= LNET_NI_STATE_DELETING;
- ni->ni_state &= ~LNET_NI_STATE_ACTIVE;
+ ni->ni_state = LNET_NI_STATE_DELETING;
lnet_ni_unlock(ni);
lnet_ni_unlink_locked(ni);
lnet_incr_dlc_seq();
}
lnet_ni_lock(ni);
- ni->ni_state |= LNET_NI_STATE_ACTIVE;
- ni->ni_state &= ~LNET_NI_STATE_INIT;
+ ni->ni_state = LNET_NI_STATE_ACTIVE;
lnet_ni_unlock(ni);
/* We keep a reference on the loopback net through the loopback NI */