1 Index: linux-stage/fs/ext4/super.c
2 ===================================================================
3 --- linux-stage.orig/fs/ext4/super.c
4 +++ linux-stage/fs/ext4/super.c
5 @@ -675,7 +675,12 @@ static void ext4_put_super(struct super_
7 for (i = 0; i < sbi->s_gdb_count; i++)
8 brelse(sbi->s_group_desc[i]);
9 - kfree(sbi->s_group_desc);
11 + if (is_vmalloc_addr(sbi->s_group_desc))
12 + vfree(sbi->s_group_desc);
14 + kfree(sbi->s_group_desc);
16 if (is_vmalloc_addr(sbi->s_flex_groups))
17 vfree(sbi->s_flex_groups);
19 @@ -2519,12 +2524,13 @@ static int ext4_fill_super(struct super_
20 unsigned long offset = 0;
21 unsigned long journal_devnum = 0;
22 unsigned long def_mount_opts;
24 + struct inode *root = NULL;
29 unsigned int db_count;
32 int needs_recovery, has_huge_files;
34 @@ -2850,11 +2856,18 @@ static int ext4_fill_super(struct super_
35 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
36 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
37 EXT4_DESC_PER_BLOCK(sb);
38 - sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
40 + size = (size_t)db_count * sizeof(struct buffer_head *);
41 + sbi->s_group_desc = kzalloc(size, GFP_KERNEL);
42 if (sbi->s_group_desc == NULL) {
43 - ext4_msg(sb, KERN_ERR, "not enough memory");
45 + sbi->s_group_desc = vmalloc(size);
46 + if (sbi->s_group_desc != NULL) {
47 + memset(sbi->s_group_desc, 0, size);
49 + ext4_msg(sb, KERN_ERR, "no memory for %u groups (%u)\n",
50 + sbi->s_groups_count, (unsigned int)size);
57 @@ -3064,17 +3077,16 @@ no_journal:
59 ext4_msg(sb, KERN_ERR, "get root inode failed");
64 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
66 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
69 sb->s_root = d_alloc_root(root);
71 ext4_msg(sb, KERN_ERR, "get root dentry failed");
76 @@ -3125,6 +3137,7 @@ no_journal:
78 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
84 @@ -3166,6 +3179,8 @@ cantfind_ext4:
90 ext4_msg(sb, KERN_ERR, "mount failed");
91 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
93 @@ -3190,7 +3205,11 @@ failed_mount3:
95 for (i = 0; i < db_count; i++)
96 brelse(sbi->s_group_desc[i]);
97 - kfree(sbi->s_group_desc);
99 + if (is_vmalloc_addr(sbi->s_group_desc))
100 + vfree(sbi->s_group_desc);
102 + kfree(sbi->s_group_desc);
105 remove_proc_entry(sb->s_id, ext4_proc_root);
106 Index: linux-stage/fs/ext4/mballoc.c
107 ===================================================================
108 --- linux-stage.orig/fs/ext4/mballoc.c
109 +++ linux-stage/fs/ext4/mballoc.c
113 #include <linux/debugfs.h>
114 +#include <linux/vmalloc.h>
115 #include <trace/events/ext4.h>
118 @@ -2408,24 +2409,37 @@ static int ext4_mb_init_backend(struct s
119 while (array_size < sizeof(*sbi->s_group_info) *
120 num_meta_group_infos_max)
121 array_size = array_size << 1;
122 - /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
123 - * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
124 - * So a two level scheme suffices for now. */
125 - sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
126 + /* A 16TB filesystem with 64-bit pointers requires an 8192 byte
127 + * kmalloc(). Filesystems larger than 2^32 blocks (16TB normally)
128 + * have group descriptors at least twice as large (64 bytes or
129 + * more vs. 32 bytes for traditional ext3 filesystems), so a 128TB
130 + * filesystem needs a 128kB allocation, which may need vmalloc(). */
131 + sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
132 if (sbi->s_group_info == NULL) {
133 - printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
135 + sbi->s_group_info = vmalloc(array_size);
136 + if (sbi->s_group_info != NULL) {
137 + memset(sbi->s_group_info, 0, array_size);
139 + ext4_msg(sb, KERN_ERR, "no memory for groupinfo (%u)\n",
144 sbi->s_buddy_cache = new_inode(sb);
145 if (sbi->s_buddy_cache == NULL) {
146 - printk(KERN_ERR "EXT4-fs: can't get new inode\n");
147 + ext4_msg(sb, KERN_ERR, "can't get new inode\n");
150 + /* To avoid potentially colliding with an valid on-disk inode number,
151 + * use EXT4_BAD_INO for the buddy cache inode number. This inode is
152 + * not in the inode hash, so it should never be found by iget(), but
153 + * this will avoid confusion if it ever shows up during debugging. */
154 + sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
155 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
156 for (i = 0; i < ngroups; i++) {
157 desc = ext4_get_group_desc(sb, i, NULL);
160 + ext4_msg(sb, KERN_ERR,
161 "EXT4-fs: can't read descriptor %u\n", i);
164 @@ -2461,7 +2474,10 @@ err_freebuddy:
165 kfree(sbi->s_group_info[i]);
166 iput(sbi->s_buddy_cache);
168 - kfree(sbi->s_group_info);
169 + if (is_vmalloc_addr(sbi->s_group_info))
170 + vfree(sbi->s_group_info);
172 + kfree(sbi->s_group_info);
176 @@ -2502,14 +2518,6 @@ int ext4_mb_init(struct super_block *sb,
178 } while (i <= sb->s_blocksize_bits + 1);
180 - /* init file for buddy data */
181 - ret = ext4_mb_init_backend(sb);
183 - kfree(sbi->s_mb_offsets);
184 - kfree(sbi->s_mb_maxs);
188 spin_lock_init(&sbi->s_md_lock);
189 spin_lock_init(&sbi->s_bal_lock);
191 @@ -2579,6 +2587,15 @@ int ext4_mb_init(struct super_block *sb,
192 spin_lock_init(&lg->lg_prealloc_lock);
195 + /* init file for buddy data */
196 + ret = ext4_mb_init_backend(sb);
198 + kfree(sbi->s_mb_prealloc_table);
199 + kfree(sbi->s_mb_offsets);
200 + kfree(sbi->s_mb_maxs);
205 struct proc_dir_entry *p;
206 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
207 @@ -2639,7 +2656,10 @@ int ext4_mb_release(struct super_block *
208 EXT4_DESC_PER_BLOCK_BITS(sb);
209 for (i = 0; i < num_meta_group_infos; i++)
210 kfree(sbi->s_group_info[i]);
211 - kfree(sbi->s_group_info);
212 + if (is_vmalloc_addr(sbi->s_group_info))
213 + vfree(sbi->s_group_info);
215 + kfree(sbi->s_group_info);
217 kfree(sbi->s_mb_offsets);
218 kfree(sbi->s_mb_maxs);