Whamcloud - gitweb
mke2fs: skip zeroing journal blocks
authorAndreas Dilger <adilger@whamcloud.com>
Sat, 11 Jun 2011 16:17:29 +0000 (12:17 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Sat, 11 Jun 2011 16:19:12 +0000 (12:19 -0400)
Add the ability to skip zeroing journal blocks on disk.  This can
significantly speed up mke2fs with large journals.  At worst the
uninitialized journal is only a very short-term risk (if at all),
because the journal will be overwritten on any new filesystem as
soon as any significant amount of data is written to disk, and
the new journal TID would need to match the offset/TID of an old
commit block still left on disk.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
lib/ext2fs/ext2fs.h
lib/ext2fs/mkjournal.c
misc/mke2fs.8.in
misc/mke2fs.c

index c4d262f..ee7e3c2 100644 (file)
@@ -204,10 +204,9 @@ typedef struct ext2_file *ext2_file_t;
 
 /*
  * Flags for mkjournal
- *
- * EXT2_MKJOURNAL_V1_SUPER     Make a (deprecated) V1 journal superblock
  */
-#define EXT2_MKJOURNAL_V1_SUPER        0x0000001
+#define EXT2_MKJOURNAL_V1_SUPER        0x0000001 /* create V1 superblock (deprecated) */
+#define EXT2_MKJOURNAL_LAZYINIT        0x0000002 /* don't zero journal inode before use*/
 
 struct opaque_ext2_group_desc;
 
index e5060f4..5113ced 100644 (file)
@@ -104,7 +104,7 @@ static errcode_t write_journal_file(ext2_filsys fs, char *filename,
        /* Open the device or journal file */
        if ((fd = open(filename, O_WRONLY)) < 0) {
                retval = errno;
-               goto errout;
+               goto errfree;
        }
 
        /* Write the superblock out */
@@ -118,6 +118,9 @@ static errcode_t write_journal_file(ext2_filsys fs, char *filename,
                goto errout;
        memset(buf, 0, fs->blocksize);
 
+       if (flags & EXT2_MKJOURNAL_LAZYINIT)
+               goto success;
+
        for (i = 1; i < num_blocks; i++) {
                ret_size = write(fd, buf, fs->blocksize);
                if (ret_size < 0) {
@@ -127,10 +130,12 @@ static errcode_t write_journal_file(ext2_filsys fs, char *filename,
                if (ret_size != (int) fs->blocksize)
                        goto errout;
        }
-       close(fd);
 
+success:
        retval = 0;
 errout:
+       close(fd);
+errfree:
        ext2fs_free_mem(&buf);
        return retval;
 }
@@ -214,6 +219,7 @@ struct mkjournal_struct {
        blk64_t         goal;
        blk64_t         blk_to_zero;
        int             zero_count;
+       int             flags;
        char            *buf;
        errcode_t       err;
 };
@@ -251,7 +257,7 @@ static int mkjournal_proc(ext2_filsys       fs,
        retval = 0;
        if (blockcnt <= 0)
                retval = io_channel_write_blk64(fs->io, new_blk, 1, es->buf);
-       else {
+       else if (!(es->flags & EXT2_MKJOURNAL_LAZYINIT)) {
                if (es->zero_count) {
                        if ((es->blk_to_zero + es->zero_count == new_blk) &&
                            (es->zero_count < 1024))
@@ -317,6 +323,7 @@ static errcode_t write_journal_inode(ext2_filsys fs, ext2_ino_t journal_ino,
        es.newblocks = 0;
        es.buf = buf;
        es.err = 0;
+       es.flags = flags;
        es.zero_count = 0;
 
        if (fs->super->s_feature_incompat & EXT3_FEATURE_INCOMPAT_EXTENTS) {
@@ -519,6 +526,13 @@ errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t num_blocks, int flags)
                if ((fd = open(jfile, O_CREAT|O_WRONLY, 0600)) < 0)
                        return errno;
 
+               /* Note that we can't do lazy journal initialization for mounted
+                * filesystems, since the zero writing is also allocating the
+                * journal blocks.  We could use fallocate, but not all kernels
+                * support that, and creating a journal on a mounted ext2
+                * filesystems is extremely rare these days...  Ignore it. */
+               flags &= ~EXT2_MKJOURNAL_LAZYINIT;
+
                if ((retval = write_journal_file(fs, jfile, num_blocks, flags)))
                        goto errout;
 
index 4a3b0c3..0d4b046 100644 (file)
@@ -232,7 +232,15 @@ This speeds up filesystem
 initialization noticeably, but it requires the kernel to finish
 initializing the filesystem in the background when the filesystem is
 first mounted.  If the option value is omitted, it defaults to 1 to
-enable lazy inode table initialization.
+enable lazy inode table zeroing.
+.TP
+.B lazy_journal_init\fR[\fB= \fI<0 to disable, 1 to enable>\fR]
+If enabled, the journal inode will not be fully zeroed out by
+.BR mke2fs .
+This speeds up filesystem initialization noticeably, but carries some
+small risk if the system crashes before the journal has been overwritten
+entirely one time.  If the option value is omitted, it defaults to 1 to
+enable lazy journal inode zeroing.
 .TP
 .B test_fs
 Set a flag in the filesystem superblock indicating that it may be
index a246ec1..5ff3f9f 100644 (file)
@@ -503,6 +503,10 @@ static void create_journal_dev(ext2_filsys fs)
                        _("while initializing journal superblock"));
                exit(1);
        }
+
+       if (journal_flags & EXT2_MKJOURNAL_LAZYINIT)
+               goto write_superblock;
+
        ext2fs_numeric_progress_init(fs, &progress,
                                     _("Zeroing journal device: "),
                                     ext2fs_blocks_count(fs->super));
@@ -527,6 +531,8 @@ static void create_journal_dev(ext2_filsys fs)
        }
        ext2fs_zero_blocks2(0, 0, 0, 0, 0);
 
+       ext2fs_numeric_progress_close(fs, &progress, NULL);
+write_superblock:
        retval = io_channel_write_blk64(fs->io,
                                        fs->super->s_first_data_block+1,
                                        1, buf);
@@ -535,7 +541,6 @@ static void create_journal_dev(ext2_filsys fs)
                        _("while writing journal superblock"));
                exit(1);
        }
-       ext2fs_numeric_progress_close(fs, &progress, NULL);
 }
 
 static void show_stats(ext2_filsys fs)
@@ -762,6 +767,12 @@ static void parse_extended_opts(struct ext2_super_block *param,
                                lazy_itable_init = strtoul(arg, &p, 0);
                        else
                                lazy_itable_init = 1;
+               } else if (!strcmp(token, "lazy_journal_init")) {
+                       if (arg)
+                               journal_flags |= strtoul(arg, &p, 0) ?
+                                               EXT2_MKJOURNAL_LAZYINIT : 0;
+                       else
+                               journal_flags |= EXT2_MKJOURNAL_LAZYINIT;
                } else if (!strcmp(token, "discard")) {
                        discard = 1;
                } else if (!strcmp(token, "nodiscard")) {
@@ -781,6 +792,7 @@ static void parse_extended_opts(struct ext2_super_block *param,
                        "\tstripe-width=<RAID stride * data disks in blocks>\n"
                        "\tresize=<resize maximum size in blocks>\n"
                        "\tlazy_itable_init=<0 to disable, 1 to enable>\n"
+                       "\tlazy_journal_init=<0 to disable, 1 to enable>\n"
                        "\ttest_fs\n"
                        "\tdiscard\n"
                        "\tnodiscard\n\n"),
@@ -1821,6 +1833,9 @@ profile_error:
                                                 "lazy_itable_init",
                                                 lazy_itable_init);
        discard = get_bool_from_profile(fs_types, "discard" , discard);
+       journal_flags |= get_bool_from_profile(fs_types,
+                                              "lazy_journal_init", 0) ?
+                                              EXT2_MKJOURNAL_LAZYINIT : 0;
 
        /* Get options from profile */
        for (cpp = fs_types; *cpp; cpp++) {