Whamcloud - gitweb
ext2fs: parallel bitmap loading
authorWang Shilong <wshilong@ddn.com>
Thu, 14 Jan 2021 00:27:22 +0000 (16:27 -0800)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 21 Jan 2021 15:50:40 +0000 (10:50 -0500)
In our benchmarking for PiB size filesystem, pass5 takes
10446s to finish and 99.5% of time takes on reading bitmaps.

It makes sense to reading bitmaps using multiple threads,
a quickly benchmark show 10446s to 626s with 64 threads.

[ This has all of many bug fixes for rw_bitmaps.c from the original
  luster patch set collapsed into a single commit.   In addition it has
  the new ext2fs_rw_bitmaps() api proposed by Ted. ]

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Signed-off-by: Saranya Muruganandam <saranyamohan@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
lib/ext2fs/ext2fs.h
lib/ext2fs/rw_bitmaps.c

index 5955c3a..82ce912 100644 (file)
@@ -689,6 +689,14 @@ struct ext2_xattr_handle;
 #define XATTR_CHANGED  2
 
 /*
+ * flags for ext2fs_rw_bitmaps()
+ */
+#define EXT2FS_BITMAPS_WRITE           0x0001
+#define EXT2FS_BITMAPS_BLOCK           0x0002
+#define EXT2FS_BITMAPS_INODE           0x0004
+#define EXT2FS_BITMAPS_VALID_FLAGS     0x0007
+
+/*
  * function prototypes
  */
 static inline int ext2fs_has_group_desc_csum(ext2_filsys fs)
index d80c9eb..7e4f7c6 100644 (file)
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
+#ifdef HAVE_PTHREAD_H
+#include <pthread.h>
+#endif
 
 #include "ext2_fs.h"
 #include "ext2fs.h"
 #include "e2image.h"
 
+#ifdef HAVE_PTHREAD
+typedef pthread_mutex_t mutex_t;
+
+static void unix_pthread_mutex_lock(mutex_t *mutex)
+{
+       if (mutex)
+               pthread_mutex_lock(mutex);
+}
+static void unix_pthread_mutex_unlock(mutex_t *mutex)
+{
+       if (mutex)
+               pthread_mutex_unlock(mutex);
+}
+#else
+typedef int mutex_t;
+#define unix_pthread_mutex_lock(mutex_t) do {} while (0)
+#define unix_pthread_mutex_unlock(mutex_t) do {} while (0)
+#endif
+
 static errcode_t write_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 {
        dgrp_t          i;
@@ -205,22 +227,12 @@ static int bitmap_tail_verify(unsigned char *bitmap, int first, int last)
        return 1;
 }
 
-static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
+static errcode_t read_bitmaps_range_prepare(ext2_filsys fs, int flags)
 {
-       dgrp_t i;
-       char *block_bitmap = 0, *inode_bitmap = 0;
-       char *buf;
        errcode_t retval;
        int block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
        int inode_nbytes = EXT2_INODES_PER_GROUP(fs->super) / 8;
-       int tail_flags = 0;
-       int csum_flag;
-       unsigned int    cnt;
-       blk64_t blk;
-       blk64_t blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
-       blk64_t   blk_cnt;
-       ext2_ino_t ino_itr = 1;
-       ext2_ino_t ino_cnt;
+       char *buf;
 
        EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);
 
@@ -230,12 +242,11 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 
        fs->write_bitmaps = ext2fs_write_bitmaps;
 
-       csum_flag = ext2fs_has_group_desc_csum(fs);
-
        retval = ext2fs_get_mem(strlen(fs->device_name) + 80, &buf);
        if (retval)
                return retval;
-       if (do_block) {
+
+       if (flags & EXT2FS_BITMAPS_BLOCK) {
                if (fs->block_map)
                        ext2fs_free_block_bitmap(fs->block_map);
                strcpy(buf, "block bitmap for ");
@@ -243,12 +254,9 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
                retval = ext2fs_allocate_block_bitmap(fs, buf, &fs->block_map);
                if (retval)
                        goto cleanup;
-               retval = io_channel_alloc_buf(fs->io, 0, &block_bitmap);
-               if (retval)
-                       goto cleanup;
-       } else
-               block_nbytes = 0;
-       if (do_inode) {
+       }
+
+       if (flags & EXT2FS_BITMAPS_INODE) {
                if (fs->inode_map)
                        ext2fs_free_inode_bitmap(fs->inode_map);
                strcpy(buf, "inode bitmap for ");
@@ -256,13 +264,62 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
                retval = ext2fs_allocate_inode_bitmap(fs, buf, &fs->inode_map);
                if (retval)
                        goto cleanup;
+       }
+       ext2fs_free_mem(&buf);
+
+       return retval;
+
+cleanup:
+       if (flags & EXT2FS_BITMAPS_BLOCK) {
+               ext2fs_free_block_bitmap(fs->block_map);
+               fs->block_map = 0;
+       }
+       if (flags & EXT2FS_BITMAPS_INODE) {
+               ext2fs_free_inode_bitmap(fs->inode_map);
+               fs->inode_map = 0;
+       }
+       if (buf)
+               ext2fs_free_mem(&buf);
+       return retval;
+}
+
+static errcode_t read_bitmaps_range_start(ext2_filsys fs, int flags,
+                                         dgrp_t start, dgrp_t end,
+                                         mutex_t *mutex,
+                                         int *tail_flags)
+{
+       dgrp_t i;
+       char *block_bitmap = 0, *inode_bitmap = 0;
+       errcode_t retval = 0;
+       int block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
+       int inode_nbytes = EXT2_INODES_PER_GROUP(fs->super) / 8;
+       int csum_flag;
+       unsigned int    cnt;
+       blk64_t blk;
+       blk64_t blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
+       blk64_t   blk_cnt;
+       ext2_ino_t ino_itr = 1;
+       ext2_ino_t ino_cnt;
+
+       csum_flag = ext2fs_has_group_desc_csum(fs);
+
+       if (flags & EXT2FS_BITMAPS_BLOCK) {
+               retval = io_channel_alloc_buf(fs->io, 0, &block_bitmap);
+               if (retval)
+                       goto cleanup;
+       } else {
+               block_nbytes = 0;
+       }
+
+       if (flags & EXT2FS_BITMAPS_INODE) {
                retval = io_channel_alloc_buf(fs->io, 0, &inode_bitmap);
                if (retval)
                        goto cleanup;
-       } else
+       } else {
                inode_nbytes = 0;
-       ext2fs_free_mem(&buf);
+       }
 
+       /* io should be null */
        if (fs->flags & EXT2_FLAG_IMAGE_FILE) {
                blk = (ext2fs_le32_to_cpu(fs->image_header->offset_inodemap) / fs->blocksize);
                ino_cnt = fs->super->s_inodes_count;
@@ -300,10 +357,12 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
                        blk_itr += cnt;
                        blk_cnt -= cnt;
                }
-               goto success_cleanup;
+               goto cleanup;
        }
 
-       for (i = 0; i < fs->group_desc_count; i++) {
+       blk_itr += ((blk64_t)start * (block_nbytes << 3));
+       ino_itr += ((blk64_t)start * (inode_nbytes << 3));
+       for (i = start; i <= end; i++) {
                if (block_bitmap) {
                        blk = ext2fs_block_bitmap_loc(fs, i);
                        if ((csum_flag &&
@@ -329,12 +388,14 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
                                }
                                if (!bitmap_tail_verify((unsigned char *) block_bitmap,
                                                        block_nbytes, fs->blocksize - 1))
-                                       tail_flags |= EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+                                       *tail_flags |= EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
                        } else
                                memset(block_bitmap, 0, block_nbytes);
                        cnt = block_nbytes << 3;
+                       unix_pthread_mutex_lock(mutex);
                        retval = ext2fs_set_block_bitmap_range2(fs->block_map,
                                               blk_itr, cnt, block_bitmap);
+                       unix_pthread_mutex_unlock(mutex);
                        if (retval)
                                goto cleanup;
                        blk_itr += block_nbytes << 3;
@@ -365,63 +426,225 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
                                }
                                if (!bitmap_tail_verify((unsigned char *) inode_bitmap,
                                                        inode_nbytes, fs->blocksize - 1))
-                                       tail_flags |= EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
+                                       *tail_flags |= EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
                        } else
                                memset(inode_bitmap, 0, inode_nbytes);
                        cnt = inode_nbytes << 3;
+                       unix_pthread_mutex_lock(mutex);
                        retval = ext2fs_set_inode_bitmap_range2(fs->inode_map,
                                               ino_itr, cnt, inode_bitmap);
+                       unix_pthread_mutex_unlock(mutex);
                        if (retval)
                                goto cleanup;
                        ino_itr += inode_nbytes << 3;
                }
        }
 
+cleanup:
+       if (inode_bitmap)
+               ext2fs_free_mem(&inode_bitmap);
+       if (block_bitmap)
+               ext2fs_free_mem(&block_bitmap);
+       return retval;
+}
+
+static errcode_t read_bitmaps_range_end(ext2_filsys fs, int flags,
+                                       int tail_flags)
+{
+       errcode_t retval;
+
        /* Mark group blocks for any BLOCK_UNINIT groups */
-       if (do_block) {
+       if (flags & EXT2FS_BITMAPS_BLOCK) {
                retval = mark_uninit_bg_group_blocks(fs);
                if (retval)
-                       goto cleanup;
-       }
-
-success_cleanup:
-       if (inode_bitmap) {
-               ext2fs_free_mem(&inode_bitmap);
-               fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
-       }
-       if (block_bitmap) {
-               ext2fs_free_mem(&block_bitmap);
+                       return retval;
                fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
        }
+       if (flags & EXT2FS_BITMAPS_INODE)
+               fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
        fs->flags |= tail_flags;
+
        return 0;
+}
 
-cleanup:
-       if (do_block) {
+static void read_bitmaps_cleanup_on_error(ext2_filsys fs, int flags)
+{
+       if (flags & EXT2FS_BITMAPS_BLOCK) {
                ext2fs_free_block_bitmap(fs->block_map);
                fs->block_map = 0;
        }
-       if (do_inode) {
+       if (flags & EXT2FS_BITMAPS_INODE) {
                ext2fs_free_inode_bitmap(fs->inode_map);
                fs->inode_map = 0;
        }
-       if (inode_bitmap)
-               ext2fs_free_mem(&inode_bitmap);
-       if (block_bitmap)
-               ext2fs_free_mem(&block_bitmap);
-       if (buf)
-               ext2fs_free_mem(&buf);
+}
+
+static errcode_t read_bitmaps_range(ext2_filsys fs, int flags,
+                                   dgrp_t start, dgrp_t end)
+{
+       errcode_t retval;
+       int tail_flags = 0;
+
+       retval = read_bitmaps_range_prepare(fs, flags);
+       if (retval)
+               return retval;
+
+       retval = read_bitmaps_range_start(fs, flags, start, end,
+                                         NULL, &tail_flags);
+       if (retval == 0)
+               retval = read_bitmaps_range_end(fs, flags, tail_flags);
+       if (retval)
+               read_bitmaps_cleanup_on_error(fs, flags);
+       return retval;
+}
+
+#ifdef HAVE_PTHREAD
+struct read_bitmaps_thread_info {
+       ext2_filsys     rbt_fs;
+       int             rbt_flags;
+       dgrp_t          rbt_grp_start;
+       dgrp_t          rbt_grp_end;
+       errcode_t       rbt_retval;
+       pthread_mutex_t *rbt_mutex;
+       int             rbt_tail_flags;
+};
+
+static void *read_bitmaps_thread(void *data)
+{
+       struct read_bitmaps_thread_info *rbt = data;
+
+       rbt->rbt_retval = read_bitmaps_range_start(rbt->rbt_fs, rbt->rbt_flags,
+                               rbt->rbt_grp_start, rbt->rbt_grp_end,
+                               rbt->rbt_mutex, &rbt->rbt_tail_flags);
+       return NULL;
+}
+#endif
+
+errcode_t ext2fs_rw_bitmaps(ext2_filsys fs, int flags, int num_threads)
+{
+#ifdef HAVE_PTHREAD
+       pthread_attr_t  attr;
+       pthread_t *thread_ids = NULL;
+       struct read_bitmaps_thread_info *thread_infos = NULL;
+       pthread_mutex_t rbt_mutex = PTHREAD_MUTEX_INITIALIZER;
+       errcode_t retval;
+       errcode_t rc;
+       unsigned flexbg_size = 1 << fs->super->s_log_groups_per_flex;
+       dgrp_t average_group;
+       int i, tail_flags = 0;
+       io_manager manager = unix_io_manager;
+#endif
+
+       if (flags & ~EXT2FS_BITMAPS_VALID_FLAGS)
+               return EXT2_ET_INVALID_ARGUMENT;
+
+       if (flags & EXT2FS_BITMAPS_WRITE)
+               return write_bitmaps(fs, flags & EXT2FS_BITMAPS_INODE,
+                                    flags & EXT2FS_BITMAPS_BLOCK);
+
+#ifdef HAVE_PTHREAD
+       if (((fs->io->flags & CHANNEL_FLAGS_THREADS) == 0) ||
+           (num_threads == 1) || (fs->flags & EXT2_FLAG_IMAGE_FILE))
+               goto fallback;
+
+       if (num_threads < 0) {
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF)
+               num_threads = sysconf(_SC_NPROCESSORS_CONF);
+#else
+               /*
+                * Guess for now; eventually we should probably define
+                * ext2fs_get_num_cpus() and teach it how to get this info on
+                * MacOS, FreeBSD, etc.
+                * ref: https://stackoverflow.com/questions/150355
+                */
+               num_threads = 4;
+#endif /* HAVE_SYSCONF */
+       }
+       if (num_threads > fs->group_desc_count)
+               num_threads = fs->group_desc_count;
+       average_group = fs->group_desc_count / num_threads;
+       if (ext2fs_has_feature_flex_bg(fs->super)) {
+               average_group = (average_group / flexbg_size) * flexbg_size;
+       }
+       if (average_group == 0)
+               goto fallback;
+
+       io_channel_set_options(fs->io, "cache=off");
+       retval = pthread_attr_init(&attr);
+       if (retval)
+               return retval;
+
+       thread_ids = calloc(sizeof(pthread_t), num_threads);
+       if (!thread_ids)
+               return -ENOMEM;
+
+       thread_infos = calloc(sizeof(struct read_bitmaps_thread_info),
+                               num_threads);
+       if (!thread_infos)
+               goto out;
+
+       retval = read_bitmaps_range_prepare(fs, flags);
+       if (retval)
+               goto out;
+
+//     fprintf(stdout, "Multiple threads triggered to read bitmaps\n");
+       for (i = 0; i < num_threads; i++) {
+               thread_infos[i].rbt_fs = fs;
+               thread_infos[i].rbt_flags = flags;
+               thread_infos[i].rbt_mutex = &rbt_mutex;
+               thread_infos[i].rbt_tail_flags = 0;
+               if (i == 0)
+                       thread_infos[i].rbt_grp_start = 0;
+               else
+                       thread_infos[i].rbt_grp_start = average_group * i + 1;
+
+               if (i == num_threads - 1)
+                       thread_infos[i].rbt_grp_end = fs->group_desc_count - 1;
+               else
+                       thread_infos[i].rbt_grp_end = average_group * (i + 1);
+               retval = pthread_create(&thread_ids[i], &attr,
+                                       &read_bitmaps_thread, &thread_infos[i]);
+               if (retval)
+                       break;
+       }
+       for (i = 0; i < num_threads; i++) {
+               if (!thread_ids[i])
+                       break;
+               rc = pthread_join(thread_ids[i], NULL);
+               if (rc && !retval)
+                       retval = rc;
+               rc = thread_infos[i].rbt_retval;
+               if (rc && !retval)
+                       retval = rc;
+               tail_flags |= thread_infos[i].rbt_tail_flags;
+       }
+out:
+       rc = pthread_attr_destroy(&attr);
+       if (rc && !retval)
+               retval = rc;
+       free(thread_infos);
+       free(thread_ids);
+
+       if (retval == 0)
+               retval = read_bitmaps_range_end(fs, flags, tail_flags);
+       if (retval)
+               read_bitmaps_cleanup_on_error(fs, flags);
+       /* XXX should save and restore cache setting */
+       io_channel_set_options(fs->io, "cache=on");
        return retval;
+fallback:
+#endif /* HAVE_PTHREAD */
+       return read_bitmaps_range(fs, flags, 0, fs->group_desc_count - 1);
 }
 
 errcode_t ext2fs_read_inode_bitmap(ext2_filsys fs)
 {
-       return read_bitmaps(fs, 1, 0);
+       return ext2fs_rw_bitmaps(fs, EXT2FS_BITMAPS_INODE, -1);
 }
 
 errcode_t ext2fs_read_block_bitmap(ext2_filsys fs)
 {
-       return read_bitmaps(fs, 0, 1);
+       return ext2fs_rw_bitmaps(fs, EXT2FS_BITMAPS_BLOCK, -1);
 }
 
 errcode_t ext2fs_write_inode_bitmap(ext2_filsys fs)
@@ -436,10 +659,15 @@ errcode_t ext2fs_write_block_bitmap (ext2_filsys fs)
 
 errcode_t ext2fs_read_bitmaps(ext2_filsys fs)
 {
-       if (fs->inode_map && fs->block_map)
-               return 0;
+       int flags = 0;
 
-       return read_bitmaps(fs, !fs->inode_map, !fs->block_map);
+       if (!fs->inode_map)
+               flags |= EXT2FS_BITMAPS_INODE;
+       if (!fs->block_map)
+               flags |= EXT2FS_BITMAPS_BLOCK;
+       if (flags == 0)
+               return 0;
+       return ext2fs_rw_bitmaps(fs, flags, -1);
 }
 
 errcode_t ext2fs_write_bitmaps(ext2_filsys fs)