Whamcloud - gitweb
LU-13569 lnet: Deprecate lnet_recovery_interval
authorChris Horn <chris.horn@hpe.com>
Fri, 21 Aug 2020 19:42:48 +0000 (14:42 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Sat, 23 Mar 2024 20:31:09 +0000 (20:31 +0000)
We no longer use a static recovery interval, so remove its remaining
uses and add warning that it has been deprecated.

Lustre-change: https://review.whamcloud.com/39722
Lustre-commit: 79ab0535622782c82636cee47918dc4b19983144

Test-Parameters: trivial
HPE-bug-id: LUS-9109
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: Iedc79803ef5b7ba041a531961eb77acd338abfb5
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54404
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lnet/lnet/api-ni.c
lnet/lnet/lib-move.c

index 39aa124..66b091c 100644 (file)
@@ -122,7 +122,7 @@ module_param_call(lnet_recovery_interval, recovery_interval_set, param_get_int,
                  &lnet_recovery_interval, S_IRUGO|S_IWUSR);
 #endif
 MODULE_PARM_DESC(lnet_recovery_interval,
-               "Interval to recover unhealthy interfaces in seconds");
+               "DEPRECATED - Interval to recover unhealthy interfaces in seconds");
 
 unsigned int lnet_recovery_limit;
 module_param(lnet_recovery_limit, uint, 0644);
@@ -317,30 +317,7 @@ sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
 static int
 recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
 {
-       int rc;
-       unsigned *interval = (unsigned *)kp->arg;
-       unsigned long value;
-
-       rc = kstrtoul(val, 0, &value);
-       if (rc) {
-               CERROR("Invalid module parameter value for 'lnet_recovery_interval'\n");
-               return rc;
-       }
-
-       if (value < 1) {
-               CERROR("lnet_recovery_interval must be at least 1 second\n");
-               return -EINVAL;
-       }
-
-       /*
-        * The purpose of locking the api_mutex here is to ensure that
-        * the correct value ends up stored properly.
-        */
-       mutex_lock(&the_lnet.ln_api_mutex);
-
-       *interval = value;
-
-       mutex_unlock(&the_lnet.ln_api_mutex);
+       CWARN("'lnet_recovery_interval' has been deprecated\n");
 
        return 0;
 }
index eefcc84..20cf75e 100644 (file)
@@ -3753,9 +3753,7 @@ lnet_recover_peer_nis(void)
 static int
 lnet_monitor_thread(void *arg)
 {
-       time64_t recovery_timeout = 0;
        time64_t rsp_timeout = 0;
-       int interval;
        time64_t now;
 
        wait_for_completion(&the_lnet.ln_started);
@@ -3784,11 +3782,8 @@ lnet_monitor_thread(void *arg)
                        rsp_timeout = now + (lnet_transaction_timeout / 2);
                }
 
-               if (now >= recovery_timeout) {
-                       lnet_recover_local_nis();
-                       lnet_recover_peer_nis();
-                       recovery_timeout = now + lnet_recovery_interval;
-               }
+               lnet_recover_local_nis();
+               lnet_recover_peer_nis();
 
                lnet_queue_ping_buffer_update();
 
@@ -3800,18 +3795,10 @@ lnet_monitor_thread(void *arg)
                 * if we wake up every 1 second? Although, we've seen
                 * cases where we get a complaint that an idle thread
                 * is waking up unnecessarily.
-                *
-                * Take into account the current net_count when you wake
-                * up for alive router checking, since we need to check
-                * possibly as many networks as we have configured.
                 */
-               interval = min(lnet_recovery_interval,
-                              min((unsigned int) alive_router_check_interval /
-                                       lnet_current_net_count,
-                                  lnet_transaction_timeout / 2));
                wait_for_completion_interruptible_timeout(
                        &the_lnet.ln_mt_wait_complete,
-                       cfs_time_seconds(interval));
+                       cfs_time_seconds(1));
                /* Must re-init the completion before testing anything,
                 * including ln_mt_state.
                 */