Whamcloud - gitweb
LU-16169 e2fsck: improve parallel thread balance 06/48806/8
authorAndreas Dilger <adilger@whamcloud.com>
Sat, 8 Oct 2022 06:02:49 +0000 (00:02 -0600)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 29 Aug 2023 18:24:14 +0000 (18:24 +0000)
Improve the balance of work allocated to each thread by distributing
a more equal number of inodes to each thread, rather than an equal
number of groups.

In some cases in real usage, the number of inodes allocated to
threads with equal numbers of groups can vary by 10x or more, which
leads to pass1 threads having a runtime time roughly propritional
to the number of inodes allocated to them.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Iea964cca33d19170e9b6d88aa725dc878cae6ce2
Reviewed-on: https://review.whamcloud.com/c/tools/e2fsprogs/+/48806
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Li Dongyang <dongyangli@ddn.com>
e2fsck/e2fsck.h
e2fsck/pass1.c
tests/f_multithread/expect.1
tests/f_multithread_logfile/expect.1
tests/f_multithread_no/expect.1

index eb22c74..aaa2f29 100644 (file)
@@ -303,6 +303,8 @@ struct e2fsck_thread {
        dgrp_t          et_group_end;
        /* The next group number to check */
        dgrp_t          et_group_next;
+       /* total number of inodes assigned to this thread */
+       ext2_ino_t      et_inode_count;
        /* Scanned inode number */
        ext2_ino_t      et_inode_number;
        int             et_log_length;
index 037b99a..449a435 100644 (file)
@@ -2995,15 +2995,20 @@ static void e2fsck_pass1_merge_invalid_bitmaps(e2fsck_t global_ctx,
        global_ctx->invalid_bitmaps += thread_ctx->invalid_bitmaps;
 }
 
-static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx, e2fsck_t *thread_ctx,
-                                            int thread_index, int num_threads,
-                                            dgrp_t average_group)
+static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx,
+                                            e2fsck_t *thread_ctx,
+                                            int thread_index,
+                                            dgrp_t average_group,
+                                            ext2_ino_t average_inodes,
+                                            dgrp_t *start_group,
+                                            int *inode_count, int last_thread)
 {
        errcode_t               retval;
        e2fsck_t                thread_context;
        ext2_filsys             thread_fs;
        ext2_filsys             global_fs = global_ctx->fs;
        struct e2fsck_thread    *tinfo;
+       dgrp_t                  grp;
 
        assert(global_ctx->inode_used_map == NULL);
        assert(global_ctx->inode_dir_map == NULL);
@@ -3054,18 +3059,50 @@ static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx, e2fsck_t *thre
        set_up_logging(thread_context);
 
        tinfo = &thread_context->thread_info;
-       tinfo->et_group_start = average_group * thread_index;
-       if (thread_index == global_fs->fs_num_threads - 1)
-               tinfo->et_group_end = thread_fs->group_desc_count;
-       else
-               tinfo->et_group_end = average_group * (thread_index + 1);
+
+       tinfo->et_group_start = *start_group;
+
+       /* Try to allocate an equal number of in-use inodes to each thread,
+        * rather than an equal number of block groups.   Accumulate inodes
+        * to each thread until approximately the average number is reached.
+        *
+        * If the thread has least one group, and the inode count is closer
+        * to the average *before* adding the next group, then stop before
+        * adding it.  Accumumlate the difference between ideal distribution
+        * and actual number assigned to each thread to avoid to drifting too
+        * far from the average, similar to Bresenham line drawing algorithm.
+        *
+        * Limit groups per thread to 5x the average, in case the group
+        * descriptor summaries are bad (e.g. read from backups with no info).
+        *
+        * Assign all remaining groups to last thread if distribution was bad.
+        */
+       for (grp = *start_group; grp < global_fs->group_desc_count; grp++) {
+               ext2_ino_t grp_used = global_fs->super->s_inodes_per_group -
+                       ext2fs_bg_free_inodes_count(global_fs, grp);
+               ext2_ino_t next_count = *inode_count + grp_used;
+
+               if (((next_count >= average_inodes && grp > *start_group &&
+                     (long)next_count - average_inodes >
+                     (long)average_inodes - *inode_count) ||
+                    grp - *start_group > average_group * 5) && !last_thread) {
+                       *inode_count -= average_inodes;
+                       break;
+               }
+               tinfo->et_inode_count += grp_used;
+               *inode_count = next_count;
+       }
+       tinfo->et_group_end = grp;
+       *start_group = grp;
        tinfo->et_group_next = tinfo->et_group_start;
        tinfo->et_inode_number = 0;
        tinfo->et_log_buf[0] = '\0';
        tinfo->et_log_length = 0;
        if (thread_context->options & E2F_OPT_MULTITHREAD)
-               log_out(thread_context, _("Scan group range [%d, %d)\n"),
-                       tinfo->et_group_start, tinfo->et_group_end);
+               log_out(thread_context,
+                       _("Scan group range [%d, %d), inode_count = %u/%u\n"),
+                       tinfo->et_group_start, tinfo->et_group_end,
+                       tinfo->et_inode_count, average_inodes);
        thread_context->fs = thread_fs;
        retval = quota_init_context(&thread_context->qctx, thread_fs, 0);
        if (retval) {
@@ -3602,9 +3639,10 @@ static void *e2fsck_pass1_thread(void *arg)
 out:
        if (thread_ctx->options & E2F_OPT_MULTITHREAD)
                log_out(thread_ctx,
-                       _("Scanned group range [%u, %u), inodes %u\n"),
+                       _("Scanned group range [%u, %u), inodes %u/%u\n"),
                        thread_ctx->thread_info.et_group_start,
                        thread_ctx->thread_info.et_group_end,
+                       thread_ctx->thread_info.et_inode_count,
                        thread_ctx->thread_info.et_inode_number);
 
 #ifdef DEBUG_THREADS
@@ -3619,12 +3657,12 @@ out:
 
 static dgrp_t ext2fs_get_avg_group(ext2_filsys fs)
 {
+       dgrp_t average_group = fs->group_desc_count;
 #ifdef HAVE_PTHREAD
-       dgrp_t average_group;
        unsigned flexbg_size;
 
        if (fs->fs_num_threads <= 1)
-               return fs->group_desc_count;
+               goto out;
 
        average_group = fs->group_desc_count / fs->fs_num_threads;
        if (average_group <= 1)
@@ -3639,11 +3677,26 @@ static dgrp_t ext2fs_get_avg_group(ext2_filsys fs)
                        average_group = times * flexbg_size;
                }
        }
-
+out:
+#endif
        return average_group;
-#else
-       return fs->group_desc_count;
+}
+
+static dgrp_t ext2fs_get_avg_inodes(ext2_filsys fs)
+{
+       ext2_ino_t average_inodes = fs->super->s_inodes_count;
+#ifdef HAVE_PTHREAD
+
+       if (fs->fs_num_threads <= 1)
+               goto out;
+
+       average_inodes = fs->super->s_inodes_count / fs->fs_num_threads;
+       if (average_inodes <= fs->super->s_inodes_per_group)
+               average_inodes = fs->super->s_inodes_per_group;
+
+out:
 #endif
+       return average_inodes;
 }
 
 static int e2fsck_pass1_threads_start(e2fsck_t global_ctx)
@@ -3653,9 +3706,12 @@ static int e2fsck_pass1_threads_start(e2fsck_t global_ctx)
        errcode_t                        retval;
        errcode_t                        ret;
        struct e2fsck_thread_info       *tmp_pinfo;
-       int                              i;
+       int                              thread;
        e2fsck_t                         thread_ctx;
+       dgrp_t                           start_group;
        dgrp_t                           average_group;
+       ext2_ino_t                       average_inodes;
+       int                              inode_count;
        int num_threads = global_ctx->pfs_num_threads;
 #ifdef DEBUG_THREADS
        struct e2fsck_thread_debug       thread_debug =
@@ -3682,15 +3738,19 @@ static int e2fsck_pass1_threads_start(e2fsck_t global_ctx)
        global_ctx->infos = infos;
 
        average_group = ext2fs_get_avg_group(global_ctx->fs);
-       for (i = 0; i < num_threads; i++) {
-               tmp_pinfo = &infos[i];
-               tmp_pinfo->eti_thread_index = i;
+       average_inodes = ext2fs_get_avg_inodes(global_ctx->fs);
+       for (thread = 0, start_group = 0, inode_count = 0;
+            thread < num_threads; thread++) {
+               tmp_pinfo = &infos[thread];
+               tmp_pinfo->eti_thread_index = thread;
 #ifdef DEBUG_THREADS
                tmp_pinfo->eti_debug = &thread_debug;
 #endif
                retval = e2fsck_pass1_thread_prepare(global_ctx, &thread_ctx,
-                                                    i, num_threads,
-                                                    average_group);
+                                                    thread, average_group,
+                                                    average_inodes,
+                                                    &start_group, &inode_count,
+                                                    thread == num_threads - 1);
                if (retval) {
                        com_err(global_ctx->program_name, retval,
                                _("while preparing pass1 thread\n"));
@@ -3775,7 +3835,7 @@ void e2fsck_pass1(e2fsck_t ctx)
 
 /*
  * When the inode_scan routines call this callback at the end of the
- * glock group, call process_inodes.
+ * block group, call process_inodes.
  */
 static errcode_t scan_callback(ext2_filsys fs,
                               ext2_inode_scan scan EXT2FS_ATTR((unused)),
@@ -3823,11 +3883,11 @@ static errcode_t scan_callback(ext2_filsys fs,
 #ifdef HAVE_PTHREAD
        if (ctx->global_ctx) {
                tinfo = &ctx->thread_info;
-               tinfo->et_group_next++;
                if (ctx->options & E2F_OPT_DEBUG &&
                    ctx->options & E2F_OPT_MULTITHREAD)
                        log_out(ctx, _("group %d finished\n"),
                                tinfo->et_group_next);
+               tinfo->et_group_next++;
                if (tinfo->et_group_next >= tinfo->et_group_end)
                        return EXT2_ET_SCAN_FINISHED;
        }
index 4db68d9..e6b6d31 100644 (file)
@@ -1,8 +1,8 @@
 ext2fs_open2: Bad magic number in super-block
 ../e2fsck/e2fsck: Superblock invalid, trying backup blocks...
 Pass 1: Checking inodes, blocks, and sizes
-[Thread 0] Scan group range [0, 2)
-[Thread 0] Scanned group range [0, 2), inodes 3008
+[Thread 0] Scan group range [0, 2), inode_count = 11/3008
+[Thread 0] Scanned group range [0, 2), inodes 11/3008
 Pass 2: Checking directory structure
 Pass 3: Checking directory connectivity
 Pass 4: Checking reference counts
index 4db68d9..e6b6d31 100644 (file)
@@ -1,8 +1,8 @@
 ext2fs_open2: Bad magic number in super-block
 ../e2fsck/e2fsck: Superblock invalid, trying backup blocks...
 Pass 1: Checking inodes, blocks, and sizes
-[Thread 0] Scan group range [0, 2)
-[Thread 0] Scanned group range [0, 2), inodes 3008
+[Thread 0] Scan group range [0, 2), inode_count = 11/3008
+[Thread 0] Scanned group range [0, 2), inodes 11/3008
 Pass 2: Checking directory structure
 Pass 3: Checking directory connectivity
 Pass 4: Checking reference counts
index eda2fca..80f337d 100644 (file)
@@ -1,8 +1,8 @@
 ext2fs_open2: Bad magic number in super-block
 ../e2fsck/e2fsck: Superblock invalid, trying backup blocks...
 Pass 1: Checking inodes, blocks, and sizes
-[Thread 0] Scan group range [0, 2)
-[Thread 0] Scanned group range [0, 2), inodes 3008
+[Thread 0] Scan group range [0, 2), inode_count = 11/3008
+[Thread 0] Scanned group range [0, 2), inodes 11/3008
 Pass 2: Checking directory structure
 Pass 3: Checking directory connectivity
 Pass 4: Checking reference counts