Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext3-mballoc3-sles10.patch
diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc3-sles10.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc3-sles10.patch
new file mode 100644 (file)
index 0000000..373f0c6
--- /dev/null
@@ -0,0 +1,377 @@
+Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h
+===================================================================
+--- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs_i.h    2007-03-28 05:12:50.000000000 +0400
++++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h 2007-03-28 16:03:20.000000000 +0400
+@@ -135,6 +135,10 @@ struct ext3_inode_info {
+       struct inode vfs_inode;
+       __u32 i_cached_extent[4];
++
++      /* mballoc */
++      struct list_head i_prealloc_list;
++      spinlock_t i_prealloc_lock;
+ };
+ #endif        /* _LINUX_EXT3_FS_I */
+Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs_sb.h   2007-03-28 16:03:19.000000000 +0400
++++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs_sb.h        2007-03-28 16:03:20.000000000 +0400
+@@ -21,8 +21,15 @@
+ #include <linux/wait.h>
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #include <linux/rbtree.h>
++#include <linux/proc_fs.h>
++
++struct ext3_buddy_group_blocks;
++struct ext3_locality_group;
++struct ext3_mb_history;
++#define EXT3_BB_MAX_BLOCKS
+ /*
+  * third extended-fs super-block data in memory
+Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs.h      2007-03-28 16:03:19.000000000 +0400
++++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h   2007-03-28 16:03:20.000000000 +0400
+@@ -407,6 +407,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x800000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x1000000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x2000000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC            0x4000000/* Buddy allocation support */
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -767,8 +768,9 @@ struct dir_private_info {
+ extern int ext3_bg_has_super(struct super_block *sb, int group);
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
++extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+-                            unsigned long);
++                            unsigned long, int);
+ extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
+                                unsigned long, unsigned long, int *);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+Index: linux-2.6.16.27-0.9-full/fs/ext3/super.c
+===================================================================
+--- linux-2.6.16.27-0.9-full.orig/fs/ext3/super.c      2007-03-28 16:03:19.000000000 +0400
++++ linux-2.6.16.27-0.9-full/fs/ext3/super.c   2007-03-28 16:03:20.000000000 +0400
+@@ -688,6 +688,7 @@ enum {
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
+       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+       Opt_extents, Opt_noextents, Opt_extdebug,
++      Opt_mballoc, Opt_nomballoc, Opt_stripe,
+       Opt_grpquota
+ };
+@@ -743,6 +744,9 @@ static match_table_t tokens = {
+       {Opt_noextents, "noextents"},
+       {Opt_extdebug, "extdebug"},
+       {Opt_barrier, "barrier=%u"},
++      {Opt_mballoc, "mballoc"},
++      {Opt_nomballoc, "nomballoc"},
++      {Opt_stripe, "stripe=%u"},
+       {Opt_err, NULL},
+       {Opt_resize, "resize"},
+ };
+@@ -1092,6 +1096,19 @@ clear_qf_name:
+               case Opt_extdebug:
+                       set_opt (sbi->s_mount_opt, EXTDEBUG);
+                       break;
++              case Opt_mballoc:
++                      set_opt(sbi->s_mount_opt, MBALLOC);
++                      break;
++              case Opt_nomballoc:
++                      clear_opt(sbi->s_mount_opt, MBALLOC);
++                      break;
++              case Opt_stripe:
++                      if (match_int(&args[0], &option))
++                              return 0;
++                      if (option < 0)
++                              return 0;
++                      sbi->s_stripe = option;
++                      break;
+               default:
+                       printk (KERN_ERR
+                               "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1819,6 +1836,7 @@ static int ext3_fill_super (struct super
+               ext3_count_dirs(sb));
+       ext3_ext_init(sb);
++      ext3_mb_init(sb, needs_recovery);
+       lock_kernel();
+       return 0;
+Index: linux-2.6.16.27-0.9-full/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.16.27-0.9-full.orig/fs/ext3/extents.c    2007-03-28 05:13:39.000000000 +0400
++++ linux-2.6.16.27-0.9-full/fs/ext3/extents.c 2007-03-28 16:03:20.000000000 +0400
+@@ -779,7 +779,7 @@ cleanup:
+               for (i = 0; i < depth; i++) {
+                       if (!ablocks[i])
+                               continue;
+-                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+               }
+       }
+       kfree(ablocks);
+@@ -1586,7 +1586,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+                 path->p_idx->ei_leaf);
+       bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+       ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+-      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+       return err;
+ }
+@@ -2071,10 +2071,12 @@ ext3_remove_blocks(struct ext3_extents_t
+       int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+       handle_t *handle = ext3_journal_start(tree->inode, needed);
+       struct buffer_head *bh;
+-      int i;
++      int i, metadata = 0;
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
++      if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode))
++              metadata = 1;
+       if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+               /* tail removal */
+               unsigned long num, start;
+@@ -2086,7 +2088,7 @@ ext3_remove_blocks(struct ext3_extents_t
+                       bh = sb_find_get_block(tree->inode->i_sb, start + i);
+                       ext3_forget(handle, 0, tree->inode, bh, start + i);
+               }
+-              ext3_free_blocks(handle, tree->inode, start, num);
++              ext3_free_blocks(handle, tree->inode, start, num, metadata);
+       } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+               printk("strange request: removal %lu-%lu from %u:%u\n",
+                      from, to, ex->ee_block, ex->ee_len);
+@@ -2177,11 +2179,8 @@ int ext3_ext_get_block(handle_t *handle,
+       struct ext3_extent *ex;
+       int goal, newblock, err = 0, depth;
+       struct ext3_extents_tree tree;
+-      unsigned long next;
+-      int allocated = 0;
+-
+-      /* until we have multiblock allocation */
+-      max_blocks = 1;
++      unsigned long allocated = 0;
++      struct ext3_allocation_request ar;
+       clear_buffer_new(bh_result);
+       ext3_init_tree_desc(&tree, inode);
+@@ -2253,18 +2252,33 @@ int ext3_ext_get_block(handle_t *handle,
+               goto out2;
+       }
++      /* find neighbour allocated blocks */
++      ar.lleft = iblock;
++      err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft);
++      if (err)
++              goto out2;
++      ar.lright = iblock;
++      err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright);
++      if (err)
++              goto out2;
++
+       /* find next allocated block so that we know how many
+        * blocks we can allocate without ovelapping next extent */
+-      EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len);
+-      next = ext3_ext_next_allocated_block(path);
+-      EXT_ASSERT(next > iblock);
+-      allocated = next - iblock;
++      EXT_ASSERT(ar.pright == 0 || ar.lright > iblock);
++      if (ar.pright == 0)
++              allocated = EXT_MAX_BLOCK - iblock;
++      else
++              allocated = ar.lright - iblock;
+       if (allocated > max_blocks)
+               allocated = max_blocks;
+       /* allocate new block */
+-      goal = ext3_ext_find_goal(inode, path, iblock);
+-      newblock = ext3_new_block(handle, inode, goal, &err);
++      ar.inode = inode;
++      ar.goal = ext3_ext_find_goal(inode, path, iblock);
++      ar.logical = iblock;
++      ar.len = allocated;
++      ar.flags = EXT3_MB_HINT_DATA;
++      newblock = ext3_mb_new_blocks(handle, &ar, &err);
+       if (!newblock)
+               goto out2;
+       ext_debug(&tree, "allocate new block: goal %d, found %d\n",
+@@ -2274,11 +2288,14 @@ int ext3_ext_get_block(handle_t *handle,
+       newex.ee_block = iblock;
+       newex.ee_start = newblock;
+       newex.ee_start_hi = 0;
+-      newex.ee_len = 1;
++      newex.ee_len = ar.len;
+       err = ext3_ext_insert_extent(handle, &tree, path, &newex);
+       if (err) {
+               /* free data blocks we just allocated */
+-              ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len);
++              /* not a good idea to call discard here directly,
++               * but otherwise we'd need to call it every free() */
++              ext3_mb_discard_inode_preallocations(inode);
++              ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0);
+               goto out2;
+       }
+       
+@@ -2287,6 +2304,7 @@ int ext3_ext_get_block(handle_t *handle,
+       /* previous routine could use block we allocated */
+       newblock = newex.ee_start;
++      allocated = newex.ee_len;
+       set_buffer_new(bh_result);
+       ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len,
+@@ -2339,6 +2357,9 @@ void ext3_ext_truncate(struct inode * in
+       down(&EXT3_I(inode)->truncate_sem);
+       ext3_ext_invalidate_cache(&tree);
++      /* it's important to discard preallocations under truncate_sem */
++      ext3_mb_discard_inode_preallocations(inode);
++
+       /* 
+        * TODO: optimization is possible here
+        * probably we need not scaning at all,
+Index: linux-2.6.16.27-0.9-full/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.16.27-0.9-full.orig/fs/ext3/Makefile     2007-03-28 05:12:50.000000000 +0400
++++ linux-2.6.16.27-0.9-full/fs/ext3/Makefile  2007-03-28 16:03:20.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o
++         extents.o mballoc.o
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.16.27-0.9-full/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.16.27-0.9-full.orig/fs/ext3/xattr.c      2007-03-13 02:56:52.000000000 +0300
++++ linux-2.6.16.27-0.9-full/fs/ext3/xattr.c   2007-03-28 16:03:20.000000000 +0400
+@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
+               ea_bdebug(bh, "refcount now=0; freeing");
+               if (ce)
+                       mb_cache_entry_free(ce);
+-              ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
++              ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
+               get_bh(bh);
+               ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+       } else {
+@@ -804,7 +804,7 @@ inserted:
+                       new_bh = sb_getblk(sb, block);
+                       if (!new_bh) {
+ getblk_failed:
+-                              ext3_free_blocks(handle, inode, block, 1);
++                              ext3_free_blocks(handle, inode, block, 1, 1);
+                               error = -EIO;
+                               goto cleanup;
+                       }
+Index: linux-2.6.16.27-0.9-full/fs/ext3/balloc.c
+===================================================================
+--- linux-2.6.16.27-0.9-full.orig/fs/ext3/balloc.c     2007-03-13 02:56:52.000000000 +0300
++++ linux-2.6.16.27-0.9-full/fs/ext3/balloc.c  2007-03-28 16:03:20.000000000 +0400
+@@ -80,7 +80,7 @@ struct ext3_group_desc * ext3_get_group_
+  *
+  * Return buffer_head on success or NULL in case of failure.
+  */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+       struct ext3_group_desc * desc;
+@@ -296,6 +296,8 @@ void ext3_discard_reservation(struct ino
+       struct ext3_reserve_window_node *rsv;
+       spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
++      ext3_mb_discard_inode_preallocations(inode);
++
+       if (!block_i)
+               return;
+@@ -491,21 +493,25 @@ error_return:
+       return;
+ }
+-/* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
+-                      unsigned long block, unsigned long count)
++void ext3_free_blocks(handle_t *handle, struct inode * inode,
++              unsigned long block, unsigned long count, int metadata)
+ {
+-      struct super_block * sb;
+-      int dquot_freed_blocks;
++      struct super_block *sb;
++      int freed;
++
++      /* this isn't the right place to decide whether block is metadata
++       * inode.c/extents.c knows better, but for safety ... */
++      if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
++                      ext3_should_journal_data(inode))
++              metadata = 1;
+       sb = inode->i_sb;
+-      if (!sb) {
+-              printk ("ext3_free_blocks: nonexistent device");
+-              return;
+-      }
+-      ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+-      if (dquot_freed_blocks)
+-              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
++      if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
++              ext3_free_blocks_sb(handle, sb, block, count, &freed);
++      else
++              ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
++      if (freed)
++              DQUOT_FREE_BLOCK(inode, freed);
+       return;
+ }
+@@ -1154,7 +1160,7 @@ int ext3_should_retry_alloc(struct super
+  * bitmap, and then for any free bit if that fails.
+  * This function also updates quota and i_blocks field.
+  */
+-int ext3_new_block(handle_t *handle, struct inode *inode,
++int ext3_new_block_old(handle_t *handle, struct inode *inode,
+                       unsigned long goal, int *errp)
+ {
+       struct buffer_head *bitmap_bh = NULL;
+Index: linux-2.6.16.27-0.9-full/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.16.27-0.9-full.orig/fs/ext3/inode.c      2007-03-28 05:13:38.000000000 +0400
++++ linux-2.6.16.27-0.9-full/fs/ext3/inode.c   2007-03-28 16:03:20.000000000 +0400
+@@ -568,7 +568,7 @@ static int ext3_alloc_branch(handle_t *h
+               ext3_journal_forget(handle, branch[i].bh);
+       }
+       for (i = 0; i < keys; i++)
+-              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0);
+       return err;
+ }
+@@ -1865,7 +1865,7 @@ ext3_clear_blocks(handle_t *handle, stru
+               }
+       }
+-      ext3_free_blocks(handle, inode, block_to_free, count);
++      ext3_free_blocks(handle, inode, block_to_free, count, 0);
+ }
+ /**
+@@ -2038,7 +2038,7 @@ static void ext3_free_branches(handle_t 
+                               ext3_journal_test_restart(handle, inode);
+                       }
+-                      ext3_free_blocks(handle, inode, nr, 1);
++                      ext3_free_blocks(handle, inode, nr, 1, 1);
+                       if (parent_bh) {
+                               /*