Whamcloud - gitweb
tune2fs: Support for large inode migration.
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Mon, 13 Aug 2007 10:26:24 +0000 (15:56 +0530)
committerTheodore Ts'o <tytso@mit.edu>
Mon, 28 Apr 2008 00:06:14 +0000 (20:06 -0400)
Add new option -I <inode_size> to tune2fs.  This is used to change the
inode size. The size need to be multiple of 2 and we don't allow to
decrease the inode size.

As a part of increasing the inode size we increase the inode table
size. We also move the used data blocks around and update the
respective inodes to point to the new block

tune2fs uses the undo I/O manager when migrating to large inode. This
helps in reverting the changes if end results are not correct.  The
environment variable TUNE2FS_UNDO_DIR is used to indicate the
directory within which the tdb file need to be created. The file will
be named tune2fs-<device-name> If TUNE2FS_UNDO_DIR is not set
/var/lib/e2fsprogs is used

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
misc/tune2fs.c

index 85046e2..527991a 100644 (file)
@@ -42,6 +42,7 @@ extern int optind;
 #include <time.h>
 #include <unistd.h>
 #include <sys/types.h>
+#include <libgen.h>
 
 #include "ext2fs/ext2_fs.h"
 #include "ext2fs/ext2fs.h"
@@ -71,6 +72,7 @@ char * new_label, *new_last_mounted, *new_UUID;
 char * io_options;
 static int c_flag, C_flag, e_flag, f_flag, g_flag, i_flag, l_flag, L_flag;
 static int m_flag, M_flag, r_flag, s_flag = -1, u_flag, U_flag, T_flag;
+static int I_flag;
 static time_t last_check_time;
 static int print_label;
 static int max_mount_count, mount_count, mount_flags;
@@ -84,10 +86,20 @@ static char *mntopts_cmd;
 static int stride, stripe_width;
 static int stride_set, stripe_width_set;
 static char *extended_cmd;
+static unsigned long int new_inode_size;
 
 int journal_size, journal_flags;
 char *journal_device;
 
+static struct list_head blk_move_list;
+
+struct blk_move {
+       struct list_head list;
+       blk_t old_loc;
+       blk_t new_loc;
+};
+
+
 static const char *please_fsck = N_("Please run e2fsck on the filesystem.\n");
 
 void do_findfs(int argc, char **argv);
@@ -104,7 +116,7 @@ static void usage(void)
                  "[-L volume_label]\n"
                  "\t[-M last_mounted_dir] [-O [^]feature[,...]]\n"
                  "\t[-E extended-option[,...]] [-T last_check_time] "
-                 "[-U UUID] device\n"), program_name);
+                 "[-U UUID]\n\t[ -I new_inode_size ] device\n"), program_name);
        exit (1);
 }
 
@@ -571,7 +583,7 @@ static void parse_tune2fs_options(int argc, char **argv)
        open_flag = EXT2_FLAG_SOFTSUPP_FEATURES;
 
        printf("tune2fs %s (%s)\n", E2FSPROGS_VERSION, E2FSPROGS_DATE);
-       while ((c = getopt(argc, argv, "c:e:fg:i:jlm:o:r:s:u:C:E:J:L:M:O:T:U:")) != EOF)
+       while ((c = getopt(argc, argv, "c:e:fg:i:jlm:o:r:s:u:C:E:I:J:L:M:O:T:U:")) != EOF)
                switch (c)
                {
                        case 'c':
@@ -772,6 +784,25 @@ static void parse_tune2fs_options(int argc, char **argv)
                                open_flag = EXT2_FLAG_RW |
                                        EXT2_FLAG_JOURNAL_DEV_OK;
                                break;
+                       case 'I':
+                               new_inode_size = strtoul (optarg, &tmp, 0);
+                               if (*tmp) {
+                                       com_err (program_name, 0,
+                                               _("bad inode size - %s"),
+                                                       optarg);
+                                       usage();
+                               }
+                               if (!((new_inode_size &
+                                               (new_inode_size - 1)) == 0)) {
+                                       com_err (program_name, 0,
+                                               _("Inode size must be a "
+                                               "power of two- %s"),
+                                                       optarg);
+                                       usage();
+                               }
+                               open_flag = EXT2_FLAG_RW;
+                               I_flag = 1;
+                               break;
                        default:
                                usage();
                }
@@ -897,6 +928,465 @@ static void parse_extended_opts(ext2_filsys fs, const char *opts)
        free(buf);
 }      
 
+static int get_move_bitmap(ext2_filsys fs, int new_ino_blks_per_grp,
+                                       ext2fs_block_bitmap bmap)
+{
+       dgrp_t i;
+       blk_t j, needed_blocks = 0;
+       blk_t start_blk, end_blk;
+
+       for (i = 0; i < fs->group_desc_count; i++) {
+
+               start_blk = fs->group_desc[i].bg_inode_table +
+                                       fs->inode_blocks_per_group;
+
+               end_blk = fs->group_desc[i].bg_inode_table +
+                                       new_ino_blks_per_grp;
+
+               for (j = start_blk; j < end_blk; j++) {
+
+                       if (ext2fs_test_block_bitmap(fs->block_map, j)) {
+                               /* FIXME!!
+                                * What happens if the block is marked
+                                * as a bad block
+                                */
+                               ext2fs_mark_block_bitmap(bmap, j);
+                               needed_blocks++;
+                       } else {
+                               /*
+                                * We are going to use this block for
+                                * inode table. So mark them used.
+                                */
+                               ext2fs_mark_block_bitmap(fs->block_map, j);
+                       }
+               }
+       }
+
+       if (needed_blocks > fs->super->s_free_blocks_count ) {
+               return ENOSPC;
+       }
+
+       return 0;
+}
+
+static int move_block(ext2_filsys fs, ext2fs_block_bitmap bmap)
+{
+       char *buf;
+       errcode_t retval;
+       blk_t blk, new_blk;
+       struct blk_move *bmv;
+
+
+       retval = ext2fs_get_mem(fs->blocksize, &buf);
+       if (retval)
+               return retval;
+
+       for (blk = fs->super->s_first_data_block;
+                       blk < fs->super->s_blocks_count; blk++) {
+
+               if (!ext2fs_test_block_bitmap(bmap, blk))
+                       continue;
+
+               retval = ext2fs_new_block(fs, blk, NULL, &new_blk);
+               if (retval)
+                       goto err_out;
+
+               /* Mark this block as allocated */
+               ext2fs_mark_block_bitmap(fs->block_map, new_blk);
+
+               /* Add it to block move list */
+               retval = ext2fs_get_mem(sizeof(struct blk_move), &bmv);
+               if (retval)
+                       goto err_out;
+
+               bmv->old_loc = blk;
+               bmv->new_loc = new_blk;
+
+               list_add(&(bmv->list), &blk_move_list);
+
+               retval = io_channel_read_blk(fs->io, blk, 1, buf);
+               if (retval)
+                       goto err_out;
+
+               retval = io_channel_write_blk(fs->io, new_blk, 1, buf);
+               if (retval)
+                       goto err_out;
+       }
+
+err_out:
+       ext2fs_free_mem(&buf);
+       return retval;
+}
+
+static blk_t transalate_block(blk_t blk)
+{
+       struct list_head *entry;
+       struct blk_move *bmv;
+
+       list_for_each(entry, &blk_move_list) {
+
+               bmv = list_entry(entry, struct blk_move, list);
+               if (bmv->old_loc == blk)
+                       return bmv->new_loc;
+       }
+
+       return 0;
+}
+
+static int process_block(ext2_filsys fs, blk_t *block_nr,
+                        e2_blkcnt_t blockcnt,
+                        blk_t ref_block EXT2FS_ATTR((unused)),
+                        int ref_offset EXT2FS_ATTR((unused)),
+                        void *priv_data EXT2FS_ATTR((unused)))
+{
+       int ret = 0;
+       blk_t new_blk;
+
+
+       new_blk = transalate_block(*block_nr);
+       if (new_blk) {
+               *block_nr = new_blk;
+               /*
+                * This will force the ext2fs_write_inode in the iterator
+                */
+               ret |= BLOCK_CHANGED;
+       }
+
+       return ret;
+}
+
+static int inode_scan_and_fix(ext2_filsys fs)
+{
+       errcode_t retval = 0;
+       ext2_ino_t ino;
+       blk_t blk;
+       char *block_buf = 0;
+       struct ext2_inode inode;
+       ext2_inode_scan scan = NULL;
+
+       retval = ext2fs_get_mem(fs->blocksize * 3, &block_buf);
+       if (retval)
+               return retval;
+
+       retval = ext2fs_open_inode_scan(fs, 0, &scan);
+       if (retval)
+               goto err_out;
+
+       while (1) {
+
+               retval = ext2fs_get_next_inode(scan, &ino, &inode);
+               if (retval)
+                       goto err_out;
+
+               if (!ino)
+                       break;
+
+               if (inode.i_links_count == 0)
+                       continue; /* inode not in use */
+
+               /* FIXME!!
+                * If we end up modifying the journal inode
+                * the sb->s_jnl_blocks will differ. But a
+                * subsequent e2fsck fixes that.
+                * Do we need to fix this ??
+                */
+
+               if (inode.i_file_acl) {
+
+                       blk = transalate_block(inode.i_file_acl);
+                       if (!blk)
+                               continue;
+
+                       inode.i_file_acl = blk;
+
+                       /*
+                        * Write the inode to disk so that inode table
+                        * resizing can work
+                        */
+                       retval = ext2fs_write_inode(fs, ino, &inode);
+                       if (retval)
+                               goto err_out;
+               }
+
+               if (!ext2fs_inode_has_valid_blocks(&inode))
+                       continue;
+
+               retval = ext2fs_block_iterate2(fs, ino, 0,
+                                               block_buf, process_block,
+                                               0);
+               if (retval)
+                       goto err_out;
+
+       }
+
+err_out:
+       ext2fs_free_mem(&block_buf);
+
+       return retval;
+
+}
+
+static int expand_inode_table(ext2_filsys fs, unsigned long int new_inode_size)
+{
+       dgrp_t i;
+       blk_t blk;
+       errcode_t retval;
+       int new_ino_blks_per_grp, j;
+       char *old_itable = NULL, *new_itable = NULL;
+       char *tmp_old_itable = NULL, *tmp_new_itable = NULL;
+       unsigned long int old_inode_size;
+       int old_itable_size, new_itable_size;
+
+       old_itable_size = fs->inode_blocks_per_group * fs->blocksize;
+       old_inode_size = EXT2_INODE_SIZE(fs->super);
+
+       new_ino_blks_per_grp = ext2fs_div_ceil(
+                                       EXT2_INODES_PER_GROUP(fs->super) *
+                                       new_inode_size,
+                                       fs->blocksize);
+
+       new_itable_size = new_ino_blks_per_grp * fs->blocksize;
+
+       retval = ext2fs_get_mem(old_itable_size, &old_itable);
+       if (retval)
+               return retval;
+
+       retval = ext2fs_get_mem(new_itable_size, &new_itable);
+       if (retval)
+               goto err_out;
+
+       tmp_old_itable = old_itable;
+       tmp_new_itable = new_itable;
+
+       for (i = 0; i < fs->group_desc_count; i++) {
+
+               blk = fs->group_desc[i].bg_inode_table;
+               retval = io_channel_read_blk(fs->io, blk,
+                               fs->inode_blocks_per_group, old_itable);
+               if (retval)
+                       goto err_out;
+
+               for (j = 0; j < EXT2_INODES_PER_GROUP(fs->super); j++) {
+
+                       memcpy(new_itable, old_itable, old_inode_size);
+
+                       memset(new_itable+old_inode_size, 0,
+                                       new_inode_size - old_inode_size);
+
+                       new_itable +=  new_inode_size;
+                       old_itable += old_inode_size;
+               }
+
+               /* reset the pointer */
+               old_itable = tmp_old_itable;
+               new_itable = tmp_new_itable;
+
+               retval = io_channel_write_blk(fs->io, blk,
+                                       new_ino_blks_per_grp, new_itable);
+               if (retval)
+                       goto err_out;
+       }
+
+       /* Update the meta data */
+       fs->inode_blocks_per_group = new_ino_blks_per_grp;
+       fs->super->s_inode_size = new_inode_size;
+
+err_out:
+       if (old_itable)
+               ext2fs_free_mem(&old_itable);
+
+       if (new_itable)
+               ext2fs_free_mem(&new_itable);
+
+       return retval;
+
+}
+
+static errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs)
+{
+       blk_t           blk;
+       ext2_ino_t      ino;
+       unsigned int    group = 0;
+       unsigned int    count = 0;
+       int             total_free = 0;
+       int             group_free = 0;
+
+       /*
+        * First calculate the block statistics
+        */
+       for (blk = fs->super->s_first_data_block;
+            blk < fs->super->s_blocks_count; blk++) {
+               if (!ext2fs_fast_test_block_bitmap(fs->block_map, blk)) {
+                       group_free++;
+                       total_free++;
+               }
+               count++;
+               if ((count == fs->super->s_blocks_per_group) ||
+                   (blk == fs->super->s_blocks_count-1)) {
+                       fs->group_desc[group++].bg_free_blocks_count =
+                               group_free;
+                       count = 0;
+                       group_free = 0;
+               }
+       }
+       fs->super->s_free_blocks_count = total_free;
+
+       /*
+        * Next, calculate the inode statistics
+        */
+       group_free = 0;
+       total_free = 0;
+       count = 0;
+       group = 0;
+
+       /* Protect loop from wrap-around if s_inodes_count maxed */
+       for (ino = 1; ino <= fs->super->s_inodes_count && ino > 0; ino++) {
+               if (!ext2fs_fast_test_inode_bitmap(fs->inode_map, ino)) {
+                       group_free++;
+                       total_free++;
+               }
+               count++;
+               if ((count == fs->super->s_inodes_per_group) ||
+                   (ino == fs->super->s_inodes_count)) {
+                       fs->group_desc[group++].bg_free_inodes_count =
+                               group_free;
+                       count = 0;
+                       group_free = 0;
+               }
+       }
+       fs->super->s_free_inodes_count = total_free;
+       ext2fs_mark_super_dirty(fs);
+       return 0;
+}
+
+#define list_for_each_safe(pos, pnext, head) \
+       for (pos = (head)->next, pnext = pos->next; pos != (head); \
+            pos = pnext, pnext = pos->next)
+
+static void free_blk_move_list()
+{
+       struct list_head *entry, *tmp;
+       struct blk_move *bmv;
+
+       list_for_each_safe(entry, tmp, &blk_move_list) {
+
+               bmv = list_entry(entry, struct blk_move, list);
+               list_del(entry);
+               ext2fs_free_mem(&bmv);
+       }
+
+       return ;
+}
+static int resize_inode(ext2_filsys fs, unsigned long int new_inode_size)
+{
+       errcode_t retval;
+       int new_ino_blks_per_grp;
+       ext2fs_block_bitmap bmap;
+
+       if (new_inode_size <= EXT2_INODE_SIZE(fs->super)) {
+               fprintf(stderr, _("New inode size too small\n"));
+               return EXT2_ET_INVALID_ARGUMENT;
+       }
+
+       ext2fs_read_inode_bitmap(fs);
+       ext2fs_read_block_bitmap(fs);
+       INIT_LIST_HEAD(&blk_move_list);
+
+
+       new_ino_blks_per_grp = ext2fs_div_ceil(
+                                       EXT2_INODES_PER_GROUP(fs->super)*
+                                       new_inode_size,
+                                       fs->blocksize);
+
+       /* We may change the file system.
+        * Mark the file system as invalid so that
+        * the user is prompted to run fsck.
+        */
+       fs->super->s_state &= ~EXT2_VALID_FS;
+
+       retval = ext2fs_allocate_block_bitmap(fs, _("blocks to be moved"),
+                                               &bmap);
+       if (retval)
+               return retval;
+
+       retval = get_move_bitmap(fs, new_ino_blks_per_grp, bmap);
+       if (retval)
+               goto err_out;
+
+       retval = move_block(fs, bmap);
+       if (retval)
+               goto err_out;
+
+       retval = inode_scan_and_fix(fs);
+       if (retval)
+               goto err_out;
+
+       retval = expand_inode_table(fs, new_inode_size);
+       if (retval)
+               goto err_out;
+
+       ext2fs_calculate_summary_stats(fs);
+
+       fs->super->s_state |= EXT2_VALID_FS;
+       /* mark super block and block bitmap as dirty */
+       ext2fs_mark_super_dirty(fs);
+       ext2fs_mark_bb_dirty(fs);
+
+err_out:
+       free_blk_move_list();
+       ext2fs_free_block_bitmap(bmap);
+
+       return retval;
+}
+
+static int tune2fs_setup_tdb(const char *name, io_manager *io_ptr)
+{
+       errcode_t retval = 0;
+       char *tdb_dir, tdb_file[PATH_MAX];
+       char *device_name, *tmp_name;
+
+#if 0 /* FIXME!! */
+       /*
+        * Configuration via a conf file would be
+        * nice
+        */
+       profile_get_string(profile, "scratch_files",
+                                       "directory", 0, 0,
+                                       &tdb_dir);
+#endif
+       tmp_name = strdup(name);
+       device_name = basename(tmp_name);
+
+       tdb_dir = getenv("E2FSPROGS_UNDO_DIR");
+       if (!tdb_dir)
+               tdb_dir="/var/lib/e2fsprogs";
+
+       if (!strcmp(tdb_dir, "none") || (tdb_dir[0] == 0) ||
+           access(tdb_dir, W_OK))
+               return 0;
+
+       sprintf(tdb_file, "%s/tune2fs-%s.e2undo", tdb_dir, device_name);
+
+       if (!access(tdb_file, F_OK)) {
+               if (unlink(tdb_file) < 0) {
+                       retval = errno;
+                       com_err(program_name, retval,
+                               _("while trying to delete %s"),
+                               tdb_file);
+                       return retval;
+               }
+       }
+
+       set_undo_io_backing_manager(*io_ptr);
+       *io_ptr = undo_io_manager;
+       set_undo_io_backup_file(tdb_file);
+       printf(_("To undo the tune2fs operations please run "
+                "the command\n    undoe2fs %s %s\n\n"),
+                tdb_file, name);
+err_out:
+       free(tmp_name);
+       return retval;
+}
 
 int main (int argc, char ** argv)
 {
@@ -928,6 +1418,17 @@ int main (int argc, char ** argv)
 #else
        io_ptr = unix_io_manager;
 #endif
+
+       if (I_flag) {
+               /*
+                * If inode resize is requested use the
+                * Undo I/O manager
+                */
+               retval = tune2fs_setup_tdb(device_name, &io_ptr);
+               if (retval)
+                       exit(1);
+       }
+
        retval = ext2fs_open2(device_name, io_options, open_flag, 
                              0, 0, io_ptr, &fs);
         if (retval) {
@@ -1076,6 +1577,27 @@ int main (int argc, char ** argv)
                }
                ext2fs_mark_super_dirty(fs);
        }
+       if (I_flag) {
+               if (mount_flags & EXT2_MF_MOUNTED) {
+                       fputs(_("The inode size may only be "
+                               "changed when the filesystem is "
+                               "unmounted.\n"), stderr);
+                       exit(1);
+               }
+               /*
+                * We want to update group descriptor also
+                * with the new free inode count
+                */
+               fs->flags &= ~EXT2_FLAG_SUPER_ONLY;
+               if (resize_inode(fs, new_inode_size)) {
+                       fputs(_("Error in resizing the inode size.\n"
+                               "Run undoe2fs to undo the "
+                               "file system changes. \n"), stderr);
+               } else {
+                       printf (_("Setting inode size  %d\n"),
+                                                       new_inode_size);
+               }
+       }
 
        if (l_flag)
                list_super (sb);