Whamcloud - gitweb
LU-136 change "force_over_16tb" mount option to "force_over_128tb"
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext4-vmalloc-rhel5.patch
1 Index: linux-stage/fs/ext4/super.c
2 ===================================================================
3 --- linux-stage.orig/fs/ext4/super.c
4 +++ linux-stage/fs/ext4/super.c
5 @@ -662,7 +662,12 @@ static void ext4_put_super(struct super_
6  
7         for (i = 0; i < sbi->s_gdb_count; i++)
8                 brelse(sbi->s_group_desc[i]);
9 -       kfree(sbi->s_group_desc);
10 +
11 +       if (is_vmalloc_addr(sbi->s_group_desc))
12 +               vfree(sbi->s_group_desc);
13 +       else
14 +               kfree(sbi->s_group_desc);
15 +
16         if (is_vmalloc_addr(sbi->s_flex_groups))
17                 vfree(sbi->s_flex_groups);
18         else
19 @@ -2402,12 +2407,13 @@ static int ext4_fill_super(struct super_
20         unsigned long offset = 0;
21         unsigned long journal_devnum = 0;
22         unsigned long def_mount_opts;
23 -       struct inode *root;
24 +       struct inode *root = NULL;
25         char *cp;
26         const char *descr;
27         int ret = -EINVAL;
28         int blocksize;
29         unsigned int db_count;
30 +       size_t size;
31         unsigned int i;
32         int needs_recovery, has_huge_files;
33         __u64 blocks_count;
34 @@ -2718,10 +2724,16 @@ static int ext4_fill_super(struct super_
35                         (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
36         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
37                    EXT4_DESC_PER_BLOCK(sb);
38 -       sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
39 -                                   GFP_KERNEL);
40 +       size = (size_t) db_count * sizeof(struct buffer_head *);
41 +       sbi->s_group_desc = kzalloc(size, GFP_KERNEL);
42 +       if (sbi->s_group_desc == NULL) {
43 +               sbi->s_group_desc = vmalloc(size);
44 +               if (sbi->s_group_desc != NULL)
45 +                       memset(sbi->s_group_desc, 0, size);
46 +       }
47         if (sbi->s_group_desc == NULL) {
48 -               ext4_msg(sb, KERN_ERR, "not enough memory");
49 +               ext4_msg(sb, KERN_ERR, "not enough memory for %u groups (%u)\n",
50 +                       sbi->s_groups_count, (unsigned int) size);
51                 goto failed_mount;
52         }
53  
54 @@ -2907,17 +2919,16 @@ no_journal:
55         if (IS_ERR(root)) {
56                 ext4_msg(sb, KERN_ERR, "get root inode failed");
57                 ret = PTR_ERR(root);
58 +               root = NULL;
59                 goto failed_mount4;
60         }
61         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
62 -               iput(root);
63                 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
64                 goto failed_mount4;
65         }
66         sb->s_root = d_alloc_root(root);
67         if (!sb->s_root) {
68                 ext4_msg(sb, KERN_ERR, "get root dentry failed");
69 -               iput(root);
70                 ret = -ENOMEM;
71                 goto failed_mount4;
72         }
73 @@ -2968,6 +2979,7 @@ no_journal:
74         if (err) {
75                 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
76                          err);
77 +               ret = err;
78                 goto failed_mount4;
79         }
80  
81 @@ -3011,6 +3023,8 @@ cantfind_ext4:
82         goto failed_mount;
83  
84  failed_mount4:
85 +       iput(root);
86 +       sb->s_root = NULL;
87         ext4_msg(sb, KERN_ERR, "mount failed");
88         destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
89  failed_mount_wq:
90 @@ -3033,7 +3047,11 @@ failed_mount3:
91  failed_mount2:
92         for (i = 0; i < db_count; i++)
93                 brelse(sbi->s_group_desc[i]);
94 -       kfree(sbi->s_group_desc);
95 +
96 +       if (is_vmalloc_addr(sbi->s_group_desc))
97 +               vfree(sbi->s_group_desc);
98 +       else
99 +               kfree(sbi->s_group_desc);
100  failed_mount:
101         if (sbi->s_proc) {
102                 remove_proc_entry(sb->s_id, ext4_proc_root);
103 Index: linux-stage/fs/ext4/mballoc.c
104 ===================================================================
105 --- linux-stage.orig/fs/ext4/mballoc.c
106 +++ linux-stage/fs/ext4/mballoc.c
107 @@ -2607,10 +2607,21 @@ static int ext4_mb_init_backend(struct s
108         while (array_size < sizeof(*sbi->s_group_info) *
109                num_meta_group_infos_max)
110                 array_size = array_size << 1;
111 -       /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
112 -        * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
113 -        * So a two level scheme suffices for now. */
114 -       sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
115 +
116 +       /*
117 +        * A 16TB filesystem with 64-bit pointers requires an 8192 byte
118 +        * kmalloc(). Filesystems larger than 2^32 blocks (16TB normally)
119 +        * have group descriptors at least twice as large (64 bytes or
120 +        * more vs. 32 bytes for traditional ext3 filesystems, so a 128TB
121 +        * filesystem needs a 128kB allocation, which may need vmalloc().
122 +        */
123 +       sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
124 +       if (sbi->s_group_info == NULL) {
125 +               sbi->s_group_info = vmalloc(array_size);
126 +               if (sbi->s_group_info != NULL)
127 +                       memset(sbi->s_group_info, 0, array_size);
128 +       }
129 +
130         if (sbi->s_group_info == NULL) {
131                 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
132                 return -ENOMEM;
133 @@ -2620,6 +2631,11 @@ static int ext4_mb_init_backend(struct s
134                 printk(KERN_ERR "EXT4-fs: can't get new inode\n");
135                 goto err_freesgi;
136         }
137 +       /*
138 +        * To avoid colliding with an valid on-disk inode number,
139 +        * EXT4_BAD_INO is used here as the number of the buddy cache inode.
140 +        */
141 +       sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
142         EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
143         for (i = 0; i < ngroups; i++) {
144                 desc = ext4_get_group_desc(sb, i, NULL);
145 @@ -2642,7 +2658,10 @@ err_freebuddy:
146                 kfree(sbi->s_group_info[i]);
147         iput(sbi->s_buddy_cache);
148  err_freesgi:
149 -       kfree(sbi->s_group_info);
150 +       if (is_vmalloc_addr(sbi->s_group_info))
151 +               vfree(sbi->s_group_info);
152 +       else
153 +               kfree(sbi->s_group_info);
154         return -ENOMEM;
155  }
156  
157 @@ -2683,14 +2702,6 @@ int ext4_mb_init(struct super_block *sb,
158                 i++;
159         } while (i <= sb->s_blocksize_bits + 1);
160  
161 -       /* init file for buddy data */
162 -       ret = ext4_mb_init_backend(sb);
163 -       if (ret != 0) {
164 -               kfree(sbi->s_mb_offsets);
165 -               kfree(sbi->s_mb_maxs);
166 -               return ret;
167 -       }
168 -
169         spin_lock_init(&sbi->s_md_lock);
170         spin_lock_init(&sbi->s_bal_lock);
171  
172 @@ -2717,6 +2728,14 @@ int ext4_mb_init(struct super_block *sb,
173                 spin_lock_init(&lg->lg_prealloc_lock);
174         }
175  
176 +       /* init file for buddy data */
177 +       ret = ext4_mb_init_backend(sb);
178 +       if (ret != 0) {
179 +               kfree(sbi->s_mb_offsets);
180 +               kfree(sbi->s_mb_maxs);
181 +               return ret;
182 +       }
183 +
184         ext4_mb_history_init(sb);
185  
186         if (sbi->s_journal)
187 @@ -2766,7 +2785,10 @@ int ext4_mb_release(struct super_block *
188                         EXT4_DESC_PER_BLOCK_BITS(sb);
189                 for (i = 0; i < num_meta_group_infos; i++)
190                         kfree(sbi->s_group_info[i]);
191 -               kfree(sbi->s_group_info);
192 +               if (is_vmalloc_addr(sbi->s_group_info))
193 +                       vfree(sbi->s_group_info);
194 +               else
195 +                       kfree(sbi->s_group_info);
196         }
197         kfree(sbi->s_mb_offsets);
198         kfree(sbi->s_mb_maxs);