Whamcloud - gitweb
libext2fs: use fallocate for creating journals and hugefiles
[tools/e2fsprogs.git] / lib / ext2fs / mkjournal.c
index 6afbbde..80a1021 100644 (file)
@@ -4,11 +4,12 @@
  * Copyright (C) 2000 Theodore Ts'o.
  *
  * %Begin-Header%
- * This file may be redistributed under the terms of the GNU Public
- * License.
+ * This file may be redistributed under the terms of the GNU Library
+ * General Public License, version 2.
  * %End-Header%
  */
 
+#include "config.h"
 #include <stdio.h>
 #include <string.h>
 #if HAVE_UNISTD_H
 #include "ext2_fs.h"
 #include "e2p/e2p.h"
 #include "ext2fs.h"
-#include "jfs_user.h"
+
+#include "kernel-jbd.h"
 
 /*
  * This function automatically sets up the journal superblock and
  * returns it as an allocated block.
  */
 errcode_t ext2fs_create_journal_superblock(ext2_filsys fs,
-                                          __u32 size, int flags,
+                                          __u32 num_blocks, int flags,
                                           char  **ret_jsb)
 {
        errcode_t               retval;
        journal_superblock_t    *jsb;
 
-       if (size < 1024)
+       if (num_blocks < JFS_MIN_JOURNAL_BLOCKS)
                return EXT2_ET_JOURNAL_TOO_SMALL;
 
        if ((retval = ext2fs_get_mem(fs->blocksize, &jsb)))
@@ -62,7 +64,7 @@ errcode_t ext2fs_create_journal_superblock(ext2_filsys fs,
        else
                jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
        jsb->s_blocksize = htonl(fs->blocksize);
-       jsb->s_maxlen = htonl(size);
+       jsb->s_maxlen = htonl(num_blocks);
        jsb->s_nr_users = htonl(1);
        jsb->s_first = htonl(1);
        jsb->s_sequence = htonl(1);
@@ -74,10 +76,7 @@ errcode_t ext2fs_create_journal_superblock(ext2_filsys fs,
        if (fs->super->s_feature_incompat &
            EXT3_FEATURE_INCOMPAT_JOURNAL_DEV) {
                jsb->s_nr_users = 0;
-               if (fs->blocksize == 1024)
-                       jsb->s_first = htonl(3);
-               else
-                       jsb->s_first = htonl(2);
+               jsb->s_first = htonl(ext2fs_journal_sb_start(fs->blocksize) + 1);
        }
 
        *ret_jsb = (char *) jsb;
@@ -90,20 +89,21 @@ errcode_t ext2fs_create_journal_superblock(ext2_filsys fs,
  * filesystems.
  */
 static errcode_t write_journal_file(ext2_filsys fs, char *filename,
-                                   blk_t size, int flags)
+                                   blk_t num_blocks, int flags)
 {
        errcode_t       retval;
        char            *buf = 0;
        int             fd, ret_size;
        blk_t           i;
 
-       if ((retval = ext2fs_create_journal_superblock(fs, size, flags, &buf)))
+       if ((retval = ext2fs_create_journal_superblock(fs, num_blocks, flags,
+                                                      &buf)))
                return retval;
 
        /* Open the device or journal file */
        if ((fd = open(filename, O_WRONLY)) < 0) {
                retval = errno;
-               goto errout;
+               goto errfree;
        }
 
        /* Write the superblock out */
@@ -117,7 +117,10 @@ static errcode_t write_journal_file(ext2_filsys fs, char *filename,
                goto errout;
        memset(buf, 0, fs->blocksize);
 
-       for (i = 1; i < size; i++) {
+       if (flags & EXT2_MKJOURNAL_LAZYINIT)
+               goto success;
+
+       for (i = 1; i < num_blocks; i++) {
                ret_size = write(fd, buf, fs->blocksize);
                if (ret_size < 0) {
                        retval = errno;
@@ -126,10 +129,12 @@ static errcode_t write_journal_file(ext2_filsys fs, char *filename,
                if (ret_size != (int) fs->blocksize)
                        goto errout;
        }
-       close(fd);
 
+success:
        retval = 0;
 errout:
+       close(fd);
+errfree:
        ext2fs_free_mem(&buf);
        return retval;
 }
@@ -144,12 +149,13 @@ errout:
  * attempt to free the static zeroizing buffer.  (This is to keep
  * programs that check for memory leaks happy.)
  */
-#define STRIDE_LENGTH 8
+#define MAX_STRIDE_LENGTH (4194304 / (int) fs->blocksize)
 errcode_t ext2fs_zero_blocks2(ext2_filsys fs, blk64_t blk, int num,
                              blk64_t *ret_blk, int *ret_count)
 {
        int             j, count;
-       static char     *buf;
+       static void     *buf;
+       static int      stride_length;
        errcode_t       retval;
 
        /* If fs is null, clean up the static buffer and return */
@@ -160,24 +166,41 @@ errcode_t ext2fs_zero_blocks2(ext2_filsys fs, blk64_t blk, int num,
                }
                return 0;
        }
+
+       /* Deal with zeroing less than 1 block */
+       if (num <= 0)
+               return 0;
+
+       /* Try a zero out command, if supported */
+       retval = io_channel_zeroout(fs->io, blk, num);
+       if (retval == 0)
+               return 0;
+
        /* Allocate the zeroizing buffer if necessary */
-       if (!buf) {
-               buf = malloc(fs->blocksize * STRIDE_LENGTH);
-               if (!buf)
-                       return ENOMEM;
-               memset(buf, 0, fs->blocksize * STRIDE_LENGTH);
+       if (num > stride_length && stride_length < MAX_STRIDE_LENGTH) {
+               void *p;
+               int new_stride = num;
+
+               if (new_stride > MAX_STRIDE_LENGTH)
+                       new_stride = MAX_STRIDE_LENGTH;
+               p = realloc(buf, fs->blocksize * new_stride);
+               if (!p)
+                       return EXT2_ET_NO_MEMORY;
+               buf = p;
+               stride_length = new_stride;
+               memset(buf, 0, fs->blocksize * stride_length);
        }
        /* OK, do the write loop */
        j=0;
        while (j < num) {
-               if (blk % STRIDE_LENGTH) {
-                       count = STRIDE_LENGTH - (blk % STRIDE_LENGTH);
+               if (blk % stride_length) {
+                       count = stride_length - (blk % stride_length);
                        if (count > (num - j))
                                count = num - j;
                } else {
                        count = num - j;
-                       if (count > STRIDE_LENGTH)
-                               count = STRIDE_LENGTH;
+                       if (count > stride_length)
+                               count = stride_length;
                }
                retval = io_channel_write_blk64(fs->io, blk, count, buf);
                if (retval) {
@@ -205,122 +228,14 @@ errcode_t ext2fs_zero_blocks(ext2_filsys fs, blk_t blk, int num,
 }
 
 /*
- * Helper function for creating the journal using direct I/O routines
+ * Calculate the initial goal block to be roughly at the middle of the
+ * filesystem.  Pick a group that has the largest number of free
+ * blocks.
  */
-struct mkjournal_struct {
-       int             num_blocks;
-       int             newblocks;
-       blk_t           goal;
-       blk_t           blk_to_zero;
-       int             zero_count;
-       char            *buf;
-       errcode_t       err;
-};
-
-static int mkjournal_proc(ext2_filsys  fs,
-                          blk_t        *blocknr,
-                          e2_blkcnt_t  blockcnt,
-                          blk_t        ref_block EXT2FS_ATTR((unused)),
-                          int          ref_offset EXT2FS_ATTR((unused)),
-                          void         *priv_data)
+static blk64_t get_midpoint_journal_block(ext2_filsys fs)
 {
-       struct mkjournal_struct *es = (struct mkjournal_struct *) priv_data;
-       blk_t   new_blk;
-       errcode_t       retval;
-
-       if (*blocknr) {
-               es->goal = *blocknr;
-               return 0;
-       }
-       retval = ext2fs_new_block(fs, es->goal, 0, &new_blk);
-       if (retval) {
-               es->err = retval;
-               return BLOCK_ABORT;
-       }
-       if (blockcnt >= 0)
-               es->num_blocks--;
-
-       es->newblocks++;
-       retval = 0;
-       if (blockcnt <= 0)
-               retval = io_channel_write_blk64(fs->io, new_blk, 1, es->buf);
-       else {
-               if (es->zero_count) {
-                       if ((es->blk_to_zero + es->zero_count == new_blk) &&
-                           (es->zero_count < 1024))
-                               es->zero_count++;
-                       else {
-                               retval = ext2fs_zero_blocks(fs,
-                                                           es->blk_to_zero,
-                                                           es->zero_count,
-                                                           0, 0);
-                               es->zero_count = 0;
-                       }
-               }
-               if (es->zero_count == 0) {
-                       es->blk_to_zero = new_blk;
-                       es->zero_count = 1;
-               }
-       }
-
-       if (blockcnt == 0)
-               memset(es->buf, 0, fs->blocksize);
-
-       if (retval) {
-               es->err = retval;
-               return BLOCK_ABORT;
-       }
-       *blocknr = es->goal = new_blk;
-       ext2fs_block_alloc_stats2(fs, new_blk, +1);
+       dgrp_t  group, start, end, i, log_flex;
 
-       if (es->num_blocks == 0)
-               return (BLOCK_CHANGED | BLOCK_ABORT);
-       else
-               return BLOCK_CHANGED;
-
-}
-
-/*
- * This function creates a journal using direct I/O routines.
- */
-static errcode_t write_journal_inode(ext2_filsys fs, ext2_ino_t journal_ino,
-                                    blk_t size, int flags)
-{
-       char                    *buf;
-       dgrp_t                  group, start, end, i, log_flex;
-       errcode_t               retval;
-       struct ext2_inode       inode;
-       struct mkjournal_struct es;
-
-       if ((retval = ext2fs_create_journal_superblock(fs, size, flags, &buf)))
-               return retval;
-
-       if ((retval = ext2fs_read_bitmaps(fs)))
-               return retval;
-
-       if ((retval = ext2fs_read_inode(fs, journal_ino, &inode)))
-               return retval;
-
-       if (inode.i_blocks > 0)
-               return EEXIST;
-
-       es.num_blocks = size;
-       es.newblocks = 0;
-       es.buf = buf;
-       es.err = 0;
-       es.zero_count = 0;
-
-       if (fs->super->s_feature_incompat & EXT3_FEATURE_INCOMPAT_EXTENTS) {
-               inode.i_flags |= EXT4_EXTENTS_FL;
-               if ((retval = ext2fs_write_inode(fs, journal_ino, &inode)))
-                       return retval;
-       }
-
-       /*
-        * Set the initial goal block to be roughly at the middle of
-        * the filesystem.  Pick a group that has the largest number
-        * of free blocks.
-        */
        group = ext2fs_group_of_blk2(fs, (ext2fs_blocks_count(fs->super) -
                                         fs->super->s_first_data_block) / 2);
        log_flex = 1 << fs->super->s_log_groups_per_flex;
@@ -336,46 +251,81 @@ static errcode_t write_journal_inode(ext2_filsys fs, ext2_ino_t journal_ino,
                start = (group > 0) ? group-1 : group;
        end = ((group+1) < fs->group_desc_count) ? group+1 : group;
        group = start;
-       for (i=start+1; i <= end; i++)
+       for (i = start + 1; i <= end; i++)
                if (ext2fs_bg_free_blocks_count(fs, i) >
                    ext2fs_bg_free_blocks_count(fs, group))
                        group = i;
+       return ext2fs_group_first_block2(fs, group);
+}
+
+/*
+ * This function creates a journal using direct I/O routines.
+ */
+static errcode_t write_journal_inode(ext2_filsys fs, ext2_ino_t journal_ino,
+                                    blk_t num_blocks, blk64_t goal, int flags)
+{
+       char                    *buf;
+       errcode_t               retval;
+       struct ext2_inode       inode;
+       unsigned long long      inode_size;
+       int                     falloc_flags = EXT2_FALLOCATE_FORCE_INIT;
+       blk64_t                 zblk;
 
-       es.goal = (fs->super->s_blocks_per_group * group) +
-               fs->super->s_first_data_block;
+       if ((retval = ext2fs_create_journal_superblock(fs, num_blocks, flags,
+                                                      &buf)))
+               return retval;
 
-       retval = ext2fs_block_iterate2(fs, journal_ino, BLOCK_FLAG_APPEND,
-                                      0, mkjournal_proc, &es);
-       if (es.err) {
-               retval = es.err;
-               goto errout;
-       }
-       if (es.zero_count) {
-               retval = ext2fs_zero_blocks(fs, es.blk_to_zero,
-                                           es.zero_count, 0, 0);
-               if (retval)
-                       goto errout;
-       }
+       if ((retval = ext2fs_read_bitmaps(fs)))
+               goto out2;
 
        if ((retval = ext2fs_read_inode(fs, journal_ino, &inode)))
-               goto errout;
+               goto out2;
+
+       if (inode.i_blocks > 0) {
+               retval = EEXIST;
+               goto out2;
+       }
 
-       inode.i_size += fs->blocksize * size;
-       ext2fs_iblk_add_blocks(fs, &inode, es.newblocks);
+       if (goal == ~0ULL)
+               goal = get_midpoint_journal_block(fs);
+
+       if (fs->super->s_feature_incompat & EXT3_FEATURE_INCOMPAT_EXTENTS)
+               inode.i_flags |= EXT4_EXTENTS_FL;
+
+       if (!(flags & EXT2_MKJOURNAL_LAZYINIT))
+               falloc_flags |= EXT2_FALLOCATE_ZERO_BLOCKS;
+
+       inode_size = (unsigned long long)fs->blocksize * num_blocks;
        inode.i_mtime = inode.i_ctime = fs->now ? fs->now : time(0);
        inode.i_links_count = 1;
        inode.i_mode = LINUX_S_IFREG | 0600;
+       retval = ext2fs_inode_size_set(fs, &inode, inode_size);
+       if (retval)
+               goto out2;
+
+       retval = ext2fs_fallocate(fs, falloc_flags, journal_ino,
+                                 &inode, goal, 0, num_blocks);
+       if (retval)
+               goto out2;
 
        if ((retval = ext2fs_write_new_inode(fs, journal_ino, &inode)))
-               goto errout;
-       retval = 0;
+               goto out2;
+
+       retval = ext2fs_bmap2(fs, journal_ino, &inode, NULL, 0, 0, NULL, &zblk);
+       if (retval)
+               goto out2;
+
+       retval = io_channel_write_blk64(fs->io, zblk, 1, buf);
+       if (retval)
+               goto out2;
 
        memcpy(fs->super->s_jnl_blocks, inode.i_block, EXT2_N_BLOCKS*4);
+       fs->super->s_jnl_blocks[15] = inode.i_size_high;
        fs->super->s_jnl_blocks[16] = inode.i_size;
        fs->super->s_jnl_backup_type = EXT3_JNL_BACKUP_BLOCKS;
        ext2fs_mark_super_dirty(fs);
 
-errout:
+out2:
        ext2fs_free_mem(&buf);
        return retval;
 }
@@ -385,21 +335,28 @@ errout:
  * in the filesystem.  For very small filesystems, it is not reasonable to
  * have a journal that fills more than half of the filesystem.
  */
-int ext2fs_default_journal_size(__u64 blocks)
+int ext2fs_default_journal_size(__u64 num_blocks)
 {
-       if (blocks < 2048)
+       if (num_blocks < 2048)
                return -1;
-       if (blocks < 32768)
+       if (num_blocks < 32768)
                return (1024);
-       if (blocks < 256*1024)
+       if (num_blocks < 256*1024)
                return (4096);
-       if (blocks < 512*1024)
+       if (num_blocks < 512*1024)
                return (8192);
-       if (blocks < 1024*1024)
+       if (num_blocks < 1024*1024)
                return (16384);
        return 32768;
 }
 
+int ext2fs_journal_sb_start(int blocksize)
+{
+       if (blocksize == EXT2_MIN_BLOCK_SIZE)
+               return 2;
+       return 1;
+}
+
 /*
  * This function adds a journal device to a filesystem
  */
@@ -407,7 +364,7 @@ errcode_t ext2fs_add_journal_device(ext2_filsys fs, ext2_filsys journal_dev)
 {
        struct stat     st;
        errcode_t       retval;
-       char            buf[1024];
+       char            buf[SUPERBLOCK_SIZE];
        journal_superblock_t    *jsb;
        int             start;
        __u32           i, nr_users;
@@ -420,10 +377,9 @@ errcode_t ext2fs_add_journal_device(ext2_filsys fs, ext2_filsys journal_dev)
                return EXT2_ET_JOURNAL_NOT_BLOCK; /* Must be a block device */
 
        /* Get the journal superblock */
-       start = 1;
-       if (journal_dev->blocksize == 1024)
-               start++;
-       if ((retval = io_channel_read_blk64(journal_dev->io, start, -1024,
+       start = ext2fs_journal_sb_start(journal_dev->blocksize);
+       if ((retval = io_channel_read_blk64(journal_dev->io, start,
+                                           -SUPERBLOCK_SIZE,
                                            buf)))
                return retval;
 
@@ -449,13 +405,15 @@ errcode_t ext2fs_add_journal_device(ext2_filsys fs, ext2_filsys journal_dev)
        }
 
        /* Writeback the journal superblock */
-       if ((retval = io_channel_write_blk64(journal_dev->io, start, -1024, buf)))
+       if ((retval = io_channel_write_blk64(journal_dev->io, start,
+                                           -SUPERBLOCK_SIZE, buf)))
                return retval;
 
        fs->super->s_journal_inum = 0;
        fs->super->s_journal_dev = st.st_rdev;
        memcpy(fs->super->s_journal_uuid, jsb->s_uuid,
               sizeof(fs->super->s_journal_uuid));
+       memset(fs->super->s_jnl_blocks, 0, sizeof(fs->super->s_jnl_blocks));
        fs->super->s_feature_compat |= EXT3_FEATURE_COMPAT_HAS_JOURNAL;
        ext2fs_mark_super_dirty(fs);
        return 0;
@@ -466,20 +424,27 @@ errcode_t ext2fs_add_journal_device(ext2_filsys fs, ext2_filsys journal_dev)
  * POSIX routines if the filesystem is mounted, or using direct I/O
  * functions if it is not.
  */
-errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t size, int flags)
+errcode_t ext2fs_add_journal_inode2(ext2_filsys fs, blk_t num_blocks,
+                                   blk64_t goal, int flags)
 {
        errcode_t               retval;
        ext2_ino_t              journal_ino;
        struct stat             st;
        char                    jfile[1024];
-       int                     mount_flags, f;
+       int                     mount_flags;
        int                     fd = -1;
 
-       if ((retval = ext2fs_check_mount_point(fs->device_name, &mount_flags,
-                                              jfile, sizeof(jfile)-10)))
+       if (flags & EXT2_MKJOURNAL_NO_MNT_CHECK)
+               mount_flags = 0;
+       else if ((retval = ext2fs_check_mount_point(fs->device_name,
+                                                   &mount_flags,
+                                                   jfile, sizeof(jfile)-10)))
                return retval;
 
        if (mount_flags & EXT2_MF_MOUNTED) {
+#if HAVE_EXT2_IOCTLS
+               int f = 0;
+#endif
                strcat(jfile, "/.journal");
 
                /*
@@ -492,9 +457,10 @@ errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t size, int flags)
 #if HAVE_EXT2_IOCTLS
                fd = open(jfile, O_RDONLY);
                if (fd >= 0) {
-                       f = 0;
-                       ioctl(fd, EXT2_IOC_SETFLAGS, &f);
+                       retval = ioctl(fd, EXT2_IOC_SETFLAGS, &f);
                        close(fd);
+                       if (retval)
+                               return retval;
                }
 #endif
 #endif
@@ -503,7 +469,14 @@ errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t size, int flags)
                if ((fd = open(jfile, O_CREAT|O_WRONLY, 0600)) < 0)
                        return errno;
 
-               if ((retval = write_journal_file(fs, jfile, size, flags)))
+               /* Note that we can't do lazy journal initialization for mounted
+                * filesystems, since the zero writing is also allocating the
+                * journal blocks.  We could use fallocate, but not all kernels
+                * support that, and creating a journal on a mounted ext2
+                * filesystems is extremely rare these days...  Ignore it. */
+               flags &= ~EXT2_MKJOURNAL_LAZYINIT;
+
+               if ((retval = write_journal_file(fs, jfile, num_blocks, flags)))
                        goto errout;
 
                /* Get inode number of the journal file */
@@ -535,6 +508,8 @@ errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t size, int flags)
                        goto errout;
                }
                journal_ino = st.st_ino;
+               memset(fs->super->s_jnl_blocks, 0,
+                      sizeof(fs->super->s_jnl_blocks));
        } else {
                if ((mount_flags & EXT2_MF_BUSY) &&
                    !(fs->flags & EXT2_FLAG_EXCLUSIVE)) {
@@ -543,7 +518,7 @@ errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t size, int flags)
                }
                journal_ino = EXT2_JOURNAL_INO;
                if ((retval = write_journal_inode(fs, journal_ino,
-                                                 size, flags)))
+                                                 num_blocks, goal, flags)))
                        return retval;
        }
 
@@ -556,17 +531,23 @@ errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t size, int flags)
        ext2fs_mark_super_dirty(fs);
        return 0;
 errout:
-       if (fd > 0)
+       if (fd >= 0)
                close(fd);
        return retval;
 }
 
+errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t num_blocks, int flags)
+{
+       return ext2fs_add_journal_inode2(fs, num_blocks, ~0ULL, flags);
+}
+
+
 #ifdef DEBUG
 main(int argc, char **argv)
 {
        errcode_t       retval;
        char            *device_name;
-       ext2_filsys     fs;
+       ext2_filsys     fs;
 
        if (argc < 2) {
                fprintf(stderr, "Usage: %s filesystem\n", argv[0]);
@@ -581,7 +562,7 @@ main(int argc, char **argv)
                exit(1);
        }
 
-       retval = ext2fs_add_journal_inode(fs, 1024);
+       retval = ext2fs_add_journal_inode(fs, JFS_MIN_JOURNAL_BLOCKS, 0);
        if (retval) {
                com_err(argv[0], retval, "while adding journal to %s",
                        device_name);
@@ -591,7 +572,7 @@ main(int argc, char **argv)
        if (retval) {
                printf("Warning, had trouble writing out superblocks.\n");
        }
-       ext2fs_close(fs);
+       ext2fs_close_free(&fs);
        exit(0);
 
 }