Whamcloud - gitweb
359bee06aeba8bbdd6964fb4a2a0e535c7504903
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel6.3 / ext4-vmalloc.patch
1 Index: linux-stage/fs/ext4/super.c
2 ===================================================================
3 --- linux-stage.orig/fs/ext4/super.c
4 +++ linux-stage/fs/ext4/super.c
5 @@ -675,7 +675,12 @@ static void ext4_put_super(struct super_
6  
7         for (i = 0; i < sbi->s_gdb_count; i++)
8                 brelse(sbi->s_group_desc[i]);
9 -       kfree(sbi->s_group_desc);
10 +
11 +       if (is_vmalloc_addr(sbi->s_group_desc))
12 +               vfree(sbi->s_group_desc);
13 +       else
14 +               kfree(sbi->s_group_desc);
15 +
16         if (is_vmalloc_addr(sbi->s_flex_groups))
17                 vfree(sbi->s_flex_groups);
18         else
19 @@ -2519,12 +2524,13 @@ static int ext4_fill_super(struct super_
20         unsigned long offset = 0;
21         unsigned long journal_devnum = 0;
22         unsigned long def_mount_opts;
23 -       struct inode *root;
24 +       struct inode *root = NULL;
25         char *cp;
26         const char *descr;
27         int ret = -EINVAL;
28         int blocksize;
29         unsigned int db_count;
30 +       size_t size;
31         unsigned int i;
32         int needs_recovery, has_huge_files;
33         __u64 blocks_count;
34 @@ -2850,11 +2856,18 @@ static int ext4_fill_super(struct super_
35                         (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
36         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
37                    EXT4_DESC_PER_BLOCK(sb);
38 -       sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
39 -                                   GFP_KERNEL);
40 +       size = (size_t)db_count * sizeof(struct buffer_head *);
41 +       sbi->s_group_desc = kzalloc(size, GFP_KERNEL);
42         if (sbi->s_group_desc == NULL) {
43 -               ext4_msg(sb, KERN_ERR, "not enough memory");
44 -               goto failed_mount;
45 +               sbi->s_group_desc = vmalloc(size);
46 +               if (sbi->s_group_desc != NULL) {
47 +                       memset(sbi->s_group_desc, 0, size);
48 +               } else {
49 +                       ext4_msg(sb, KERN_ERR, "no memory for %u groups (%u)\n",
50 +                                sbi->s_groups_count, (unsigned int)size);
51 +                       ret = -ENOMEM;
52 +                       goto failed_mount;
53 +               }
54         }
55  
56  #ifdef __BIG_ENDIAN
57 @@ -3064,17 +3077,16 @@ no_journal:
58         if (IS_ERR(root)) {
59                 ext4_msg(sb, KERN_ERR, "get root inode failed");
60                 ret = PTR_ERR(root);
61 +               root = NULL;
62                 goto failed_mount4;
63         }
64         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
65 -               iput(root);
66                 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
67                 goto failed_mount4;
68         }
69         sb->s_root = d_alloc_root(root);
70         if (!sb->s_root) {
71                 ext4_msg(sb, KERN_ERR, "get root dentry failed");
72 -               iput(root);
73                 ret = -ENOMEM;
74                 goto failed_mount4;
75         }
76 @@ -3125,6 +3137,7 @@ no_journal:
77         if (err) {
78                 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
79                          err);
80 +               ret = err;
81                 goto failed_mount4;
82         }
83  
84 @@ -3166,6 +3179,8 @@ cantfind_ext4:
85         goto failed_mount;
86  
87  failed_mount4:
88 +       iput(root);
89 +       sb->s_root = NULL;
90         ext4_msg(sb, KERN_ERR, "mount failed");
91         destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
92  failed_mount_wq:
93 @@ -3190,7 +3205,11 @@ failed_mount3:
94  failed_mount2:
95         for (i = 0; i < db_count; i++)
96                 brelse(sbi->s_group_desc[i]);
97 -       kfree(sbi->s_group_desc);
98 +
99 +       if (is_vmalloc_addr(sbi->s_group_desc))
100 +               vfree(sbi->s_group_desc);
101 +       else
102 +               kfree(sbi->s_group_desc);
103  failed_mount:
104         if (sbi->s_proc) {
105                 remove_proc_entry(sb->s_id, ext4_proc_root);
106 Index: linux-stage/fs/ext4/mballoc.c
107 ===================================================================
108 --- linux-stage.orig/fs/ext4/mballoc.c
109 +++ linux-stage/fs/ext4/mballoc.c
110 @@ -23,6 +23,7 @@
111  
112  #include "mballoc.h"
113  #include <linux/debugfs.h>
114 +#include <linux/vmalloc.h>
115  #include <trace/events/ext4.h>
116  
117  /*
118 @@ -2408,24 +2409,37 @@ static int ext4_mb_init_backend(struct s
119         while (array_size < sizeof(*sbi->s_group_info) *
120                num_meta_group_infos_max)
121                 array_size = array_size << 1;
122 -       /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
123 -        * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
124 -        * So a two level scheme suffices for now. */
125 -       sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
126 +       /* A 16TB filesystem with 64-bit pointers requires an 8192 byte
127 +        * kmalloc(). Filesystems larger than 2^32 blocks (16TB normally)
128 +        * have group descriptors at least twice as large (64 bytes or
129 +        * more vs. 32 bytes for traditional ext3 filesystems), so a 128TB
130 +        * filesystem needs a 128kB allocation, which may need vmalloc(). */
131 +       sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
132         if (sbi->s_group_info == NULL) {
133 -               printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
134 -               return -ENOMEM;
135 +               sbi->s_group_info = vmalloc(array_size);
136 +               if (sbi->s_group_info != NULL) {
137 +                       memset(sbi->s_group_info, 0, array_size);
138 +               } else {
139 +                       ext4_msg(sb, KERN_ERR, "no memory for groupinfo (%u)\n",
140 +                                array_size);
141 +                       return -ENOMEM;
142 +               }
143         }
144         sbi->s_buddy_cache = new_inode(sb);
145         if (sbi->s_buddy_cache == NULL) {
146 -               printk(KERN_ERR "EXT4-fs: can't get new inode\n");
147 +               ext4_msg(sb, KERN_ERR, "can't get new inode\n");
148                 goto err_freesgi;
149         }
150 +       /* To avoid potentially colliding with an valid on-disk inode number,
151 +        * use EXT4_BAD_INO for the buddy cache inode number.  This inode is
152 +        * not in the inode hash, so it should never be found by iget(), but
153 +        * this will avoid confusion if it ever shows up during debugging. */
154 +       sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
155         EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
156         for (i = 0; i < ngroups; i++) {
157                 desc = ext4_get_group_desc(sb, i, NULL);
158                 if (desc == NULL) {
159 -                       printk(KERN_ERR
160 +                       ext4_msg(sb, KERN_ERR,
161                                 "EXT4-fs: can't read descriptor %u\n", i);
162                         goto err_freebuddy;
163                 }
164 @@ -2461,7 +2474,10 @@ err_freebuddy:
165                 kfree(sbi->s_group_info[i]);
166         iput(sbi->s_buddy_cache);
167  err_freesgi:
168 -       kfree(sbi->s_group_info);
169 +       if (is_vmalloc_addr(sbi->s_group_info))
170 +               vfree(sbi->s_group_info);
171 +       else
172 +               kfree(sbi->s_group_info);
173         return -ENOMEM;
174  }
175  
176 @@ -2639,7 +2647,10 @@ int ext4_mb_release(struct super_block *
177                         EXT4_DESC_PER_BLOCK_BITS(sb);
178                 for (i = 0; i < num_meta_group_infos; i++)
179                         kfree(sbi->s_group_info[i]);
180 -               kfree(sbi->s_group_info);
181 +               if (is_vmalloc_addr(sbi->s_group_info))
182 +                       vfree(sbi->s_group_info);
183 +               else
184 +                       kfree(sbi->s_group_info);
185         }
186         kfree(sbi->s_mb_prealloc_table);
187         kfree(sbi->s_mb_offsets);