Whamcloud - gitweb
LU-13510 lnet: Correct the default LND timeout
[fs/lustre-release.git] / lnet / lnet / api-ni.c
index 0d5e45b..62411c6 100644 (file)
@@ -223,7 +223,15 @@ MODULE_PARM_DESC(lnet_retry_count,
                 "Maximum number of times to retry transmitting a message");
 
 
-unsigned lnet_lnd_timeout = LNET_LND_DEFAULT_TIMEOUT;
+#define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT - 1) / \
+                                 (LNET_RETRY_COUNT_HEALTH_DEFAULT + 1))
+unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
+static void lnet_set_lnd_timeout(void)
+{
+       lnet_lnd_timeout = (lnet_transaction_timeout - 1) /
+                          (lnet_retry_count + 1);
+}
+
 unsigned int lnet_current_net_count;
 
 /*
@@ -274,6 +282,7 @@ sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
        if (*sensitivity == 0 && value != 0) {
                lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT;
                lnet_retry_count = LNET_RETRY_COUNT_HEALTH_DEFAULT;
+               lnet_set_lnd_timeout();
        /*
         * if we're turning off health then use the no health timeout
         * default.
@@ -282,6 +291,7 @@ sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
                lnet_transaction_timeout =
                        LNET_TRANSACTION_TIMEOUT_NO_HEALTH_DEFAULT;
                lnet_retry_count = 0;
+               lnet_set_lnd_timeout();
        }
 
        *sensitivity = value;
@@ -326,7 +336,7 @@ static int
 discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
 {
        int rc;
-       unsigned *discovery = (unsigned *)kp->arg;
+       unsigned *discovery_off = (unsigned *)kp->arg;
        unsigned long value;
        struct lnet_ping_buffer *pbuf;
 
@@ -344,7 +354,7 @@ discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
         */
        mutex_lock(&the_lnet.ln_api_mutex);
 
-       if (value == *discovery) {
+       if (value == *discovery_off) {
                mutex_unlock(&the_lnet.ln_api_mutex);
                return 0;
        }
@@ -357,7 +367,7 @@ discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
         * updating the peers
         */
        if (the_lnet.ln_state != LNET_STATE_RUNNING) {
-               *discovery = value;
+               *discovery_off = value;
                mutex_unlock(&the_lnet.ln_api_mutex);
                return 0;
        }
@@ -371,23 +381,10 @@ discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
                pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
        lnet_net_unlock(LNET_LOCK_EX);
 
-       /*
-        * Always update the peers. This will result in a push to the
-        * peers with the updated capabilities feature mask. The peer can
-        * then take appropriate action to update its representation of
-        * the node.
-        *
-        * If discovery is already off, turn it on first before pushing
-        * the update. The discovery flag must be on before pushing.
-        * otherwise if the flag is on and we're turning it off then push
-        * first before turning the flag off. In the former case the flag
-        * is being set twice, but I find it's better to do that rather
-        * than have duplicate code in an if/else statement.
-        */
-       if (*discovery > 0 && value == 0)
-               *discovery = value;
-       lnet_push_update_to_peers(1);
-       *discovery = value;
+       /* only send a push when we're turning off discovery */
+       if (*discovery_off <= 0 && value > 0)
+               lnet_push_update_to_peers(1);
+       *discovery_off = value;
 
        mutex_unlock(&the_lnet.ln_api_mutex);
 
@@ -459,10 +456,10 @@ transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp)
        }
 
        *transaction_to = value;
-       if (lnet_retry_count == 0)
-               lnet_lnd_timeout = value;
-       else
-               lnet_lnd_timeout = value / lnet_retry_count;
+       /* Update the lnet_lnd_timeout now that we've modified the
+        * transaction timeout
+        */
+       lnet_set_lnd_timeout();
 
        mutex_unlock(&the_lnet.ln_api_mutex);
 
@@ -504,10 +501,10 @@ retry_count_set(const char *val, cfs_kernel_param_arg_t *kp)
 
        *retry_count = value;
 
-       if (value == 0)
-               lnet_lnd_timeout = lnet_transaction_timeout;
-       else
-               lnet_lnd_timeout = lnet_transaction_timeout / value;
+       /* Update the lnet_lnd_timeout now that we've modified the
+        * retry count
+        */
+       lnet_set_lnd_timeout();
 
        mutex_unlock(&the_lnet.ln_api_mutex);