Whamcloud - gitweb
libext2fs/e2fsck: provide routines to read-ahead metadata
[tools/e2fsprogs.git] / e2fsck / readahead.c
diff --git a/e2fsck/readahead.c b/e2fsck/readahead.c
new file mode 100644 (file)
index 0000000..4429a37
--- /dev/null
@@ -0,0 +1,252 @@
+/*
+ * readahead.c -- Prefetch filesystem metadata to speed up fsck.
+ *
+ * Copyright (C) 2014 Oracle.
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Library
+ * General Public License, version 2.
+ * %End-Header%
+ */
+
+#include "config.h"
+#include <string.h>
+
+#include "e2fsck.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+# define dbg_printf(f, a...)  do {printf(f, ## a); fflush(stdout); } while (0)
+#else
+# define dbg_printf(f, a...)
+#endif
+
+struct read_dblist {
+       errcode_t err;
+       blk64_t run_start;
+       blk64_t run_len;
+       int flags;
+};
+
+static int readahead_dir_block(ext2_filsys fs, struct ext2_db_entry2 *db,
+                              void *priv_data)
+{
+       struct read_dblist *pr = priv_data;
+       e2_blkcnt_t count = (pr->flags & E2FSCK_RA_DBLIST_IGNORE_BLOCKCNT ?
+                            1 : db->blockcnt);
+
+       if (!pr->run_len || db->blk != pr->run_start + pr->run_len) {
+               if (pr->run_len) {
+                       pr->err = io_channel_cache_readahead(fs->io,
+                                                            pr->run_start,
+                                                            pr->run_len);
+                       dbg_printf("readahead start=%llu len=%llu err=%d\n",
+                                  pr->run_start, pr->run_len,
+                                  (int)pr->err);
+               }
+               pr->run_start = db->blk;
+               pr->run_len = 0;
+       }
+       pr->run_len += count;
+
+       return pr->err ? DBLIST_ABORT : 0;
+}
+
+errcode_t e2fsck_readahead_dblist(ext2_filsys fs, int flags,
+                                 ext2_dblist dblist,
+                                 unsigned long long start,
+                                 unsigned long long count)
+{
+       errcode_t err;
+       struct read_dblist pr;
+
+       dbg_printf("%s: flags=0x%x\n", __func__, flags);
+       if (flags & ~E2FSCK_RA_DBLIST_ALL_FLAGS)
+               return EXT2_ET_INVALID_ARGUMENT;
+
+       memset(&pr, 0, sizeof(pr));
+       pr.flags = flags;
+       err = ext2fs_dblist_iterate3(dblist, readahead_dir_block, start,
+                                    count, &pr);
+       if (pr.err)
+               return pr.err;
+       if (err)
+               return err;
+
+       if (pr.run_len)
+               err = io_channel_cache_readahead(fs->io, pr.run_start,
+                                                pr.run_len);
+
+       return err;
+}
+
+static errcode_t e2fsck_readahead_bitmap(ext2_filsys fs,
+                                        ext2fs_block_bitmap ra_map)
+{
+       blk64_t start, end, out;
+       errcode_t err;
+
+       start = 1;
+       end = ext2fs_blocks_count(fs->super) - 1;
+
+       err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, &out);
+       while (err == 0) {
+               start = out;
+               err = ext2fs_find_first_zero_block_bitmap2(ra_map, start, end,
+                                                          &out);
+               if (err == ENOENT) {
+                       out = end;
+                       err = 0;
+               } else if (err)
+                       break;
+
+               err = io_channel_cache_readahead(fs->io, start, out - start);
+               if (err)
+                       break;
+               start = out;
+               err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end,
+                                                         &out);
+       }
+
+       if (err == ENOENT)
+               err = 0;
+
+       return err;
+}
+
+/* Try not to spew bitmap range errors for readahead */
+static errcode_t mark_bmap_range(ext2fs_block_bitmap map,
+                                blk64_t blk, unsigned int num)
+{
+       if (blk >= ext2fs_get_generic_bmap_start(map) &&
+           blk + num <= ext2fs_get_generic_bmap_end(map))
+               ext2fs_mark_block_bitmap_range2(map, blk, num);
+       else
+               return EXT2_ET_INVALID_ARGUMENT;
+       return 0;
+}
+
+static errcode_t mark_bmap(ext2fs_block_bitmap map, blk64_t blk)
+{
+       if (blk >= ext2fs_get_generic_bmap_start(map) &&
+           blk <= ext2fs_get_generic_bmap_end(map))
+               ext2fs_mark_block_bitmap2(map, blk);
+       else
+               return EXT2_ET_INVALID_ARGUMENT;
+       return 0;
+}
+
+errcode_t e2fsck_readahead(ext2_filsys fs, int flags, dgrp_t start,
+                          dgrp_t ngroups)
+{
+       blk64_t         super, old_gdt, new_gdt;
+       blk_t           blocks;
+       dgrp_t          i;
+       ext2fs_block_bitmap             ra_map = NULL;
+       dgrp_t          end = start + ngroups;
+       errcode_t       err = 0;
+
+       dbg_printf("%s: flags=0x%x start=%d groups=%d\n", __func__, flags,
+                  start, ngroups);
+       if (flags & ~E2FSCK_READA_ALL_FLAGS)
+               return EXT2_ET_INVALID_ARGUMENT;
+
+       if (end > fs->group_desc_count)
+               end = fs->group_desc_count;
+
+       if (flags == 0)
+               return 0;
+
+       err = ext2fs_allocate_block_bitmap(fs, "readahead bitmap",
+                                          &ra_map);
+       if (err)
+               return err;
+
+       for (i = start; i < end; i++) {
+               err = ext2fs_super_and_bgd_loc2(fs, i, &super, &old_gdt,
+                                               &new_gdt, &blocks);
+               if (err)
+                       break;
+
+               if (flags & E2FSCK_READA_SUPER) {
+                       err = mark_bmap(ra_map, super);
+                       if (err)
+                               break;
+               }
+
+               if (flags & E2FSCK_READA_GDT) {
+                       err = mark_bmap_range(ra_map,
+                                             old_gdt ? old_gdt : new_gdt,
+                                             blocks);
+                       if (err)
+                               break;
+               }
+
+               if ((flags & E2FSCK_READA_BBITMAP) &&
+                   !ext2fs_bg_flags_test(fs, i, EXT2_BG_BLOCK_UNINIT) &&
+                   ext2fs_bg_free_blocks_count(fs, i) <
+                               fs->super->s_blocks_per_group) {
+                       super = ext2fs_block_bitmap_loc(fs, i);
+                       err = mark_bmap(ra_map, super);
+                       if (err)
+                               break;
+               }
+
+               if ((flags & E2FSCK_READA_IBITMAP) &&
+                   !ext2fs_bg_flags_test(fs, i, EXT2_BG_INODE_UNINIT) &&
+                   ext2fs_bg_free_inodes_count(fs, i) <
+                               fs->super->s_inodes_per_group) {
+                       super = ext2fs_inode_bitmap_loc(fs, i);
+                       err = mark_bmap(ra_map, super);
+                       if (err)
+                               break;
+               }
+
+               if ((flags & E2FSCK_READA_ITABLE) &&
+                   ext2fs_bg_free_inodes_count(fs, i) <
+                               fs->super->s_inodes_per_group) {
+                       super = ext2fs_inode_table_loc(fs, i);
+                       blocks = fs->inode_blocks_per_group -
+                                (ext2fs_bg_itable_unused(fs, i) *
+                                 EXT2_INODE_SIZE(fs->super) / fs->blocksize);
+                       err = mark_bmap_range(ra_map, super, blocks);
+                       if (err)
+                               break;
+               }
+       }
+
+       if (!err)
+               err = e2fsck_readahead_bitmap(fs, ra_map);
+
+       ext2fs_free_block_bitmap(ra_map);
+       return err;
+}
+
+int e2fsck_can_readahead(ext2_filsys fs)
+{
+       errcode_t err;
+
+       err = io_channel_cache_readahead(fs->io, 0, 1);
+       dbg_printf("%s: supp=%d\n", __func__, err != EXT2_ET_OP_NOT_SUPPORTED);
+       return err != EXT2_ET_OP_NOT_SUPPORTED;
+}
+
+unsigned long long e2fsck_guess_readahead(ext2_filsys fs)
+{
+       unsigned long long guess;
+
+       /*
+        * The optimal readahead sizes were experimentally determined by
+        * djwong in August 2014.  Setting the RA size to two block groups'
+        * worth of inode table blocks seems to yield the largest reductions
+        * in e2fsck runtime.
+        */
+       guess = 2 * fs->blocksize * fs->inode_blocks_per_group;
+
+       /* Disable RA if it'd use more 1/50th of RAM. */
+       if (get_memory_size() > (guess * 50))
+               return guess / 1024;
+
+       return 0;
+}