Whamcloud - gitweb
LU-14516 mgc: configurable wait-to-reprocess time 20/42020/19
authorAlex Zhuravlev <bzzz@whamcloud.com>
Fri, 12 Mar 2021 09:00:37 +0000 (12:00 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 30 Jun 2021 15:26:26 +0000 (15:26 +0000)
so we can set it shorter, for testing purposes at least. to change
minimal wait time MGC module option 'mgc_requeue_timeout_min'
should be used (in seconds). additionally a random value upto
mgc_requeue_timeout_min is added to avoid a flood of config re-read
requests from clients. if mgc_requeue_timeout_min is set to 0,
then random part will be upto 1 second.

ost-pools: before: 5840s, after:a 3474s
sanity-flr: before: 1575s, after: 1381s
sanity-quota: before: 10679s, after: 9703s

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: Iff7dad4ba14d687b7e891a1c346397e4c370800d
Reviewed-on: https://review.whamcloud.com/42020
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Aurelien Degremont <degremoa@amazon.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mgc/mgc_internal.h
lustre/mgc/mgc_request.c
lustre/tests/test-framework.sh
lustre/utils/obd.c

index 3657c86..2289972 100644 (file)
@@ -43,6 +43,14 @@ int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data);
 
 int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld);
 
+/* this timeout represents how many seconds MGC should wait before
+ * requeue config and recover lock to the MGS. We need to randomize this
+ * in order to not flood the MGS.
+ */
+#define MGC_TIMEOUT_MIN_SECONDS                5
+
+extern unsigned int mgc_requeue_timeout_min;
+
 static inline bool cld_is_sptlrpc(struct config_llog_data *cld)
 {
        return cld->cld_type == MGS_CFG_T_SPTLRPC;
index 72c52de..65ecd67 100644 (file)
@@ -619,13 +619,6 @@ static void do_requeue(struct config_llog_data *cld)
         EXIT;
 }
 
-/* this timeout represents how many seconds MGC should wait before
- * requeue config and recover lock to the MGS. We need to randomize this
- * in order to not flood the MGS.
- */
-#define MGC_TIMEOUT_MIN_SECONDS   5
-#define MGC_TIMEOUT_RAND_CENTISEC 500
-
 static int mgc_requeue_thread(void *data)
 {
        int rc = 0;
@@ -639,7 +632,6 @@ static int mgc_requeue_thread(void *data)
        rq_state |= RQ_RUNNING;
        while (!(rq_state & RQ_STOP)) {
                struct config_llog_data *cld, *cld_prev;
-               int rand = prandom_u32_max(MGC_TIMEOUT_RAND_CENTISEC);
                int to;
 
                /* Any new or requeued lostlocks will change the state */
@@ -655,11 +647,11 @@ static int mgc_requeue_thread(void *data)
                 * caused the lock revocation to finish its setup, plus some
                 * random so everyone doesn't try to reconnect at once.
                 */
-               to = cfs_time_seconds(MGC_TIMEOUT_MIN_SECONDS * 100 + rand);
-               /* rand is centi-seconds */
+               to = mgc_requeue_timeout_min == 0 ? 1 : mgc_requeue_timeout_min;
+               to = cfs_time_seconds(mgc_requeue_timeout_min) +
+                       prandom_u32_max(cfs_time_seconds(to));
                wait_event_idle_timeout(rq_waitq,
-                                       rq_state & (RQ_STOP | RQ_PRECLEANUP),
-                                       to/100);
+                                       rq_state & (RQ_STOP | RQ_PRECLEANUP), to);
 
                /*
                 * iterate & processing through the list. for each cld, process
@@ -2288,6 +2280,40 @@ static const struct obd_ops mgc_obd_ops = {
         .o_process_config = mgc_process_config,
 };
 
+static int mgc_param_requeue_timeout_min_set(const char *val,
+                                    cfs_kernel_param_arg_t *kp)
+{
+       int rc;
+       unsigned int num;
+
+       rc = kstrtouint(val, 0, &num);
+       if (rc < 0)
+               return rc;
+       if (num > 120)
+               return -EINVAL;
+
+       mgc_requeue_timeout_min = num;
+
+       return 0;
+}
+
+static const struct kernel_param_ops param_ops_requeue_timeout_min = {
+       .set = mgc_param_requeue_timeout_min_set,
+       .get = param_get_uint,
+};
+
+#define param_check_requeue_timeout_min(name, p) \
+               __param_check(name, p, unsigned int)
+
+unsigned int mgc_requeue_timeout_min = MGC_TIMEOUT_MIN_SECONDS;
+#ifdef HAVE_KERNEL_PARAM_OPS
+module_param(mgc_requeue_timeout_min, requeue_timeout_min, 0644);
+#else
+module_param_call(mgc_requeue_timeout_min, mgc_param_requeue_timeout_min_set,
+                 param_get_uint, &param_ops_requeue_timeout_min, 0644);
+#endif
+MODULE_PARM_DESC(mgc_requeue_timeout_min, "Minimal requeue time to refresh logs");
+
 static int __init mgc_init(void)
 {
        return class_register_type(&mgc_obd_ops, NULL, false,
index f5c2aba..6566c04 100755 (executable)
@@ -5270,6 +5270,10 @@ init_param_vars () {
        TIMEOUT=$(do_facet $SINGLEMDS "lctl get_param -n timeout")
        log "Using TIMEOUT=$TIMEOUT"
 
+       # tune down to speed up testing on (usually) small setups
+       do_nodes $(comma_list $(nodes_list)) \
+               "echo 1 >/sys/module/mgc/parameters/mgc_requeue_timeout_min"
+
        osc_ensure_active $SINGLEMDS $TIMEOUT
        osc_ensure_active client $TIMEOUT
        $LCTL set_param osc.*.idle_timeout=debug
index 5d57927..9238659 100644 (file)
@@ -3714,7 +3714,7 @@ static int check_pool_cmd_result(enum lcfg_command_type cmd, char *fsname,
                        if (rc == -ENODEV)
                                return rc;
                        if (rc < 0)
-                               sleep(2);
+                               sleep(1);
                        cpt--;
                } while ((rc < 0) && (cpt > 0));
                if (rc >= 0) {
@@ -3733,7 +3733,7 @@ static int check_pool_cmd_result(enum lcfg_command_type cmd, char *fsname,
                        if (rc == -ENODEV)
                                return rc;
                        if (rc >= 0)
-                               sleep(2);
+                               sleep(1);
                        cpt--;
                } while ((rc >= 0) && (cpt > 0));
                if (rc < 0) {
@@ -3752,7 +3752,7 @@ static int check_pool_cmd_result(enum lcfg_command_type cmd, char *fsname,
                        if (rc == -ENODEV)
                                return rc;
                        if (rc != 1)
-                               sleep(2);
+                               sleep(1);
                        cpt--;
                } while ((rc != 1) && (cpt > 0));
                if (rc == 1) {
@@ -3770,7 +3770,7 @@ static int check_pool_cmd_result(enum lcfg_command_type cmd, char *fsname,
                        if (rc == -ENODEV)
                                return rc;
                        if (rc == 1)
-                               sleep(2);
+                               sleep(1);
                        cpt--;
                } while ((rc == 1) && (cpt > 0));
                if (rc != 1) {