Whamcloud - gitweb
b=21137 ext4 extent allocation is slower than in ext3
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext4-prealloc-rhel5.patch
index 34d0472..d2272bb 100644 (file)
@@ -6,16 +6,16 @@ Index: linux-2.6.18-128.1.6/fs/ext4/ext4_sb.h
  
        /* tunables */
        unsigned long s_stripe;
--      unsigned long s_mb_stream_request;
+-      unsigned int s_mb_stream_request;
 +      unsigned long s_mb_small_req;
 +      unsigned long s_mb_large_req;
-       unsigned long s_mb_max_to_scan;
-       unsigned long s_mb_min_to_scan;
-       unsigned long s_mb_stats;
-       unsigned long s_mb_order2_reqs;
+       unsigned int s_mb_max_to_scan;
+       unsigned int s_mb_min_to_scan;
+       unsigned int s_mb_stats;
+       unsigned int s_mb_order2_reqs;
 +      unsigned long *s_mb_prealloc_table;
 +      unsigned long s_mb_prealloc_table_size;
-       unsigned long s_mb_group_prealloc;
+       unsigned int s_mb_group_prealloc;
        /* where last allocation was done - for stream allocation */
        unsigned long s_mb_last_group;
 Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
@@ -58,7 +58,7 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
  int ext4_mb_init(struct super_block *sb, int needs_recovery)
  {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-@@ -2542,15 +2562,59 @@
+@@ -2542,13 +2562,57 @@
        sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
        sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
        sbi->s_mb_stats = MB_DEFAULT_STATS;
@@ -68,11 +68,10 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
 -      sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
 +
 +      if (sbi->s_stripe == 0) {
-+              sbi->s_mb_prealloc_table_size = 8;
++              sbi->s_mb_prealloc_table_size = 10;
 +              i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long);
 +              sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS);
 +              if (sbi->s_mb_prealloc_table == NULL) {
-+                              clear_opt(sbi->s_mount_opt, MBALLOC);
 +                              kfree(sbi->s_mb_offsets);
 +                              kfree(sbi->s_mb_maxs);
 +                              return -ENOMEM;
@@ -87,6 +86,8 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
 +              ext4_mb_prealloc_table_add(sbi, 128);
 +              ext4_mb_prealloc_table_add(sbi, 256);
 +              ext4_mb_prealloc_table_add(sbi, 512);
++              ext4_mb_prealloc_table_add(sbi, 1024);
++              ext4_mb_prealloc_table_add(sbi, 2048);
 +
 +              sbi->s_mb_small_req = 256;
 +              sbi->s_mb_large_req = 1024;
@@ -96,7 +97,6 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
 +              i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long);
 +              sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS);
 +              if (sbi->s_mb_prealloc_table == NULL) {
-+                      clear_opt(sbi->s_mount_opt, MBALLOC);
 +                      kfree(sbi->s_mb_offsets);
 +                      kfree(sbi->s_mb_maxs);
 +                      return -ENOMEM;
@@ -112,15 +112,13 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
 +              sbi->s_mb_group_prealloc = sbi->s_stripe * 4;
 +      }
  
-       i = sizeof(struct ext4_locality_group) * num_possible_cpus();
-       sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
+       sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
        if (sbi->s_locality_groups == NULL) {
-               clear_opt(sbi->s_mount_opt, MBALLOC);
 +              kfree(sbi->s_mb_prealloc_table);
                kfree(sbi->s_mb_offsets);
                kfree(sbi->s_mb_maxs);
                return -ENOMEM;
-@@ -2725,10 +2789,82 @@
+@@ -2725,8 +2789,82 @@
  #define EXT4_MB_MAX_TO_SCAN_NAME      "max_to_scan"
  #define EXT4_MB_MIN_TO_SCAN_NAME      "min_to_scan"
  #define EXT4_MB_ORDER2_REQ            "order2_req"
@@ -129,7 +127,7 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
 +#define EXT4_MB_LARGE_REQ             "large_req"
 +#define EXT4_MB_PREALLOC_TABLE          "prealloc_table"
  #define EXT4_MB_GROUP_PREALLOC                "group_prealloc"
++
 +static int ext4_mb_prealloc_table_proc_read(char *page, char **start, off_t off,
 +                                          int count, int *eof, void *data)
 +{
@@ -182,7 +180,7 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
 +              prev = value;
 +              num++;
 +      }
++
 +      new_table = kmalloc(num * sizeof(*new_table), GFP_KERNEL);
 +      if (new_table == NULL)
 +              return -ENOMEM;
@@ -204,35 +202,25 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
  
  #define MB_PROC_FOPS(name)                                    \
  static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v)    \
-@@ -2774,7 +2910,8 @@
- MB_PROC_FOPS(max_to_scan);
- MB_PROC_FOPS(min_to_scan);
- MB_PROC_FOPS(order2_reqs);
--MB_PROC_FOPS(stream_request);
-+MB_PROC_FOPS(small_req);
-+MB_PROC_FOPS(large_req);
- MB_PROC_FOPS(group_prealloc);
- #define       MB_PROC_HANDLER(name, var)                                      \
 @@ -2795,6 +2932,7 @@
        mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct proc_dir_entry *proc;
 +      struct proc_dir_entry *proc_entry;
-       char devname[64];
-       if (proc_root_ext4 == NULL) {
-@@ -2808,15 +2946,29 @@
-       MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
-       MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan);
-       MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
--      MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request);
-+      MB_PROC_HANDLER(EXT4_MB_SMALL_REQ, small_req);
-+      MB_PROC_HANDLER(EXT4_MB_LARGE_REQ, large_req);
-       MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
  
+       if (sbi->s_proc == NULL)
+               return -EINVAL;
+@@ -2808,13 +2946,28 @@
+       EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
+       EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
+       EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
+-      EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
++      EXT4_PROC_HANDLER(EXT4_MB_SMALL_REQ, mb_small_req);
++      EXT4_PROC_HANDLER(EXT4_MB_LARGE_REQ, mb_large_req);
+       EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
++
 +      proc_entry = create_proc_entry(EXT4_MB_PREALLOC_TABLE, S_IFREG |
-+                                     S_IRUGO | S_IWUSR, sbi->s_mb_proc);
++                                     S_IRUGO | S_IWUSR, sbi->s_proc);
 +      if (proc_entry == NULL) {
 +              printk(KERN_ERR "EXT4-fs: unable to create %s\n",
 +                     EXT4_MB_PREALLOC_TABLE);
@@ -245,26 +233,25 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
        return 0;
  
  err_out:
-       printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
-       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
--      remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
-+      remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_mb_proc);
-+      remove_proc_entry(EXT4_MB_LARGE_REQ, sbi->s_mb_proc);
-+      remove_proc_entry(EXT4_MB_SMALL_REQ, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
+       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
+-      remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
++      remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_proc);
++      remove_proc_entry(EXT4_MB_LARGE_REQ, sbi->s_proc);
++      remove_proc_entry(EXT4_MB_SMALL_REQ, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
 @@ -2838,7 +2990,9 @@
  
-       bdevname(sb->s_bdev, devname);
-       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
--      remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
-+      remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_mb_proc);
-+      remove_proc_entry(EXT4_MB_LARGE_REQ, sbi->s_mb_proc);
-+      remove_proc_entry(EXT4_MB_SMALL_REQ, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
+       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
+-      remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
++      remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_proc);
++      remove_proc_entry(EXT4_MB_LARGE_REQ, sbi->s_proc);
++      remove_proc_entry(EXT4_MB_SMALL_REQ, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
 @@ -3032,11 +3186,12 @@
  ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
@@ -358,8 +345,8 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
 @@ -3185,7 +3326,6 @@
        }
        BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
-                       start > ac->ac_o_ex.fe_logical);
--      BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+                       start > ac->ac_o_ex.fe_logical);
+-      BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
  
        /* now prepare goal request */
  
@@ -403,3 +390,26 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
        BUG_ON(ac->ac_lg != NULL);
        /*
         * locality group prealloc space are per cpu. The reason for having
+Index: linux-2.6.27.21-0.1/fs/ext4/inode.c
+===================================================================
+--- linux-2.6.27.21-0.1.orig/fs/ext4/inode.c   2009-05-28 11:12:42.000000000 +0530
++++ linux-2.6.27.21-0.1/fs/ext4/inode.c        2009-05-28 11:16:48.000000000 +0530
+@@ -2442,14 +2442,14 @@
+               return -EROFS;
+       /*
+-       * Make sure nr_to_write is >= sbi->s_mb_stream_request
++       * Make sure nr_to_write is >= sbi->s_mb_small_req
+        * This make sure small files blocks are allocated in
+        * single attempt. This ensure that small files
+        * get less fragmented.
+        */
+-      if (wbc->nr_to_write < sbi->s_mb_stream_request) {
+-              nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
+-              wbc->nr_to_write = sbi->s_mb_stream_request;
++      if (wbc->nr_to_write < sbi->s_mb_small_req) {
++              nr_to_writebump = sbi->s_mb_small_req - wbc->nr_to_write;
++              wbc->nr_to_write = sbi->s_mb_small_req;
+       }
+       if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+               range_whole = 1;