Whamcloud - gitweb
LU-11310 ldiskfs: Support for SUSE 15 GA and SP1
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / suse15 / ext4-large-dir.patch
1 Subject: [PATCH] ext4: add largedir feature
2
3 This INCOMPAT_LARGEDIR feature allows larger directories to be created
4 in ldiskfs, both with directory sizes over 2GB and and a maximum htree
5 depth of 3 instead of the current limit of 2. These features are needed
6 in order to exceed the current limit of approximately 10M entries in a
7 single directory.
8
9 This patch was originally written by Yang Sheng to support the Lustre server.
10
11 [ Bumped the credits needed to update an indexed directory -- tytso ]
12
13 Signed-off-by: Liang Zhen <liang.zhen@intel.com>
14 Signed-off-by: Yang Sheng <yang.sheng@intel.com>
15 Signed-off-by: Artem Blagodarenko <artem.blagodarenko@seagate.com>
16 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
17 Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
18 ---
19  fs/ext4/ext4.h      |  23 +++++++--
20  fs/ext4/ext4_jbd2.h |   9 +++-
21  fs/ext4/inode.c     |   4 +-
22  fs/ext4/namei.c     | 120 ++++++++++++++++++++++++++++++--------------
23  4 files changed, 111 insertions(+), 45 deletions(-)
24
25 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
26 index 0999eff..ca73d33 100644
27 --- a/fs/ext4/ext4.h
28 +++ b/fs/ext4/ext4.h
29 @@ -1815,7 +1815,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt,              ENCRYPT)
30                                          EXT4_FEATURE_INCOMPAT_DIRDATA| \
31                                          EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
32                                          EXT4_FEATURE_INCOMPAT_ENCRYPT | \
33 -                                        EXT4_FEATURE_INCOMPAT_CSUM_SEED)
34 +                                        EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
35 +                                        EXT4_FEATURE_INCOMPAT_LARGEDIR)
36  #define EXT4_FEATURE_RO_COMPAT_SUPP    (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
37                                          EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
38                                          EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
39 @@ -2200,6 +2201,16 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
40   */
41  #define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
42  
43 +/* htree levels for ext4 */
44 +#define        EXT4_HTREE_LEVEL_COMPAT 2
45 +#define        EXT4_HTREE_LEVEL        3
46 +
47 +static inline int ext4_dir_htree_level(struct super_block *sb)
48 +{
49 +       return ext4_has_feature_largedir(sb) ?
50 +               EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
51 +}
52 +
53  /*
54   * Timeout and state flag for lazy initialization inode thread.
55   */
56 @@ -2848,13 +2859,15 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
57         es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
58  }
59  
60 -static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
61 +static inline loff_t ext4_isize(struct super_block *sb,
62 +                               struct ext4_inode *raw_inode)
63  {
64 -       if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
65 +       if (ext4_has_feature_largedir(sb) ||
66 +           S_ISREG(le16_to_cpu(raw_inode->i_mode)))
67                 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
68                         le32_to_cpu(raw_inode->i_size_lo);
69 -       else
70 -               return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
71 +
72 +       return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
73  }
74  
75  static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
76 diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
77 index 4b7cc1a..a0ea2d6 100644
78 --- a/fs/ext4/ext4_jbd2.h
79 +++ b/fs/ext4/ext4_jbd2.h
80 @@ -77,7 +77,14 @@
81  
82  #define EXT4_RESERVE_TRANS_BLOCKS      12U
83  
84 -#define EXT4_INDEX_EXTRA_TRANS_BLOCKS  8
85 +/*
86 + * Number of credits needed if we need to insert an entry into a
87 + * directory.  For each new index block, we need 4 blocks (old index
88 + * block, new index block, bitmap block, bg summary).  For normal
89 + * htree directories there are 2 levels; if the largedir feature
90 + * enabled it's 3 levels.
91 + */
92 +#define EXT4_INDEX_EXTRA_TRANS_BLOCKS  12U
93  
94  #ifdef CONFIG_QUOTA
95  /* Amount of blocks needed for quota update - we know that the structure was
96 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
97 index 462988c..19f38c4 100644
98 --- a/fs/ext4/inode.c
99 +++ b/fs/ext4/inode.c
100 @@ -4817,7 +4817,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
101         if (ext4_has_feature_64bit(sb))
102                 ei->i_file_acl |=
103                         ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
104 -       inode->i_size = ext4_isize(raw_inode);
105 +       inode->i_size = ext4_isize(sb, raw_inode);
106         if ((size = i_size_read(inode)) < 0) {
107                 EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
108                 ret = -EFSCORRUPTED;
109 @@ -5145,7 +5145,7 @@ static int ext4_do_update_inode(handle_t *handle,
110                 raw_inode->i_file_acl_high =
111                         cpu_to_le16(ei->i_file_acl >> 32);
112         raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
113 -       if (ei->i_disksize != ext4_isize(raw_inode)) {
114 +       if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
115                 ext4_isize_set(raw_inode, ei->i_disksize);
116                 need_datasync = 1;
117         }
118 diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
119 index c585762..e7fb642 100644
120 --- a/fs/ext4/namei.c
121 +++ b/fs/ext4/namei.c
122 @@ -520,7 +520,7 @@ struct dx_root_info *dx_get_dx_info(struct ext4_dir_entry_2 *de)
123  
124  static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
125  {
126 -       return le32_to_cpu(entry->block) & 0x00ffffff;
127 +       return le32_to_cpu(entry->block) & 0x0fffffff;
128  }
129  
130  static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
131 @@ -752,6 +752,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
132         struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
133         u32 hash;
134  
135 +       memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
136         frame->bh = ext4_read_dirblock(dir, 0, INDEX);
137         if (IS_ERR(frame->bh))
138                 return (struct dx_frame *) frame->bh;
139 @@ -781,9 +782,15 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
140         }
141  
142         indirect = info->indirect_levels;
143 -       if (indirect > 1) {
144 -               ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
145 -                                  info->indirect_levels);
146 +       if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
147 +               ext4_warning(dir->i_sb,
148 +                            "Directory (ino: %lu) htree depth %#06x exceed"
149 +                            "supported value", dir->i_ino,
150 +                            ext4_dir_htree_level(dir->i_sb));
151 +               if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
152 +                       ext4_warning(dir->i_sb, "Enable large directory "
153 +                                               "feature to access it");
154 +               }
155                 goto fail;
156         }
157  
158 @@ -874,12 +881,20 @@ fail:
159  
160  static void dx_release(struct dx_frame *frames)
161  {
162 +       struct dx_root_info *info;
163 +       int i;
164 +
165         if (frames[0].bh == NULL)
166                 return;
167  
168 -       if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
169 -               brelse(frames[1].bh);
170 -       brelse(frames[0].bh);
171 +       for (i = 0, info = dx_get_dx_info((struct ext4_dir_entry_2 *)frames[0].bh->b_data);
172 +            i <= info->indirect_levels;
173 +            i++) {
174 +               if (frames[i].bh == NULL)
175 +                       break;
176 +               brelse(frames[i].bh);
177 +               frames[i].bh = NULL;
178 +       }
179  }
180  
181  /*
182 @@ -1065,7 +1080,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
183  {
184         struct dx_hash_info hinfo;
185         struct ext4_dir_entry_2 *de;
186 -       struct dx_frame frames[2], *frame;
187 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
188         struct inode *dir;
189         ext4_lblk_t block;
190         int count = 0;
191 @@ -1505,7 +1520,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
192                         struct ext4_dir_entry_2 **res_dir)
193  {
194         struct super_block * sb = dir->i_sb;
195 -       struct dx_frame frames[2], *frame;
196 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
197         struct buffer_head *bh;
198         ext4_lblk_t block;
199         int retval;
200 @@ -1985,7 +2000,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
201                             struct inode *inode, struct buffer_head *bh)
202  {
203         struct buffer_head *bh2;
204 -       struct dx_frame frames[2], *frame;
205 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
206         struct dx_entry *entries;
207         struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
208         struct ext4_dir_entry_tail *t;
209 @@ -2295,13 +2310,16 @@ out:
210  static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
211                              struct inode *dir, struct inode *inode)
212  {
213 -       struct dx_frame frames[2], *frame;
214 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
215         struct dx_entry *entries, *at;
216         struct buffer_head *bh;
217         struct super_block *sb = dir->i_sb;
218         struct ext4_dir_entry_2 *de;
219 +       int restart;
220         int err;
221  
222 +again:
223 +       restart = 0;
224         frame = dx_probe(fname, dir, NULL, frames);
225         if (IS_ERR(frame))
226                 return PTR_ERR(frame);
227 @@ -2323,24 +2341,44 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
228         if (err != -ENOSPC)
229                 goto cleanup;
230  
231 +       err = 0;
232         /* Block full, should compress but for now just split */
233         dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
234                        dx_get_count(entries), dx_get_limit(entries)));
235         /* Need to split index? */
236         if (dx_get_count(entries) == dx_get_limit(entries)) {
237                 ext4_lblk_t newblock;
238 -               unsigned icount = dx_get_count(entries);
239 -               int levels = frame - frames;
240 +               int levels = frame - frames + 1;
241 +               unsigned int icount;
242 +               int add_level = 1;
243                 struct dx_entry *entries2;
244                 struct dx_node *node2;
245                 struct buffer_head *bh2;
246  
247 -               if (levels && (dx_get_count(frames->entries) ==
248 -                              dx_get_limit(frames->entries))) {
249 -                       ext4_warning_inode(dir, "Directory index full!");
250 +               while (frame > frames) {
251 +                       if (dx_get_count((frame - 1)->entries) <
252 +                           dx_get_limit((frame - 1)->entries)) {
253 +                               add_level = 0;
254 +                               break;
255 +                       }
256 +                       frame--; /* split higher index block */
257 +                       at = frame->at;
258 +                       entries = frame->entries;
259 +                       restart = 1;
260 +               }
261 +               if (add_level && levels == ext4_dir_htree_level(sb)) {
262 +                       ext4_warning(sb, "Directory (ino: %lu) index full, "
263 +                                        "reach max htree level :%d",
264 +                                        dir->i_ino, levels);
265 +                       if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
266 +                               ext4_warning(sb, "Large directory feature is "
267 +                                                "not enabled on this "
268 +                                                "filesystem");
269 +                       }
270                         err = -ENOSPC;
271                         goto cleanup;
272                 }
273 +               icount = dx_get_count(entries);
274                 bh2 = ext4_append(handle, dir, &newblock);
275                 if (IS_ERR(bh2)) {
276                         err = PTR_ERR(bh2);
277 @@ -2355,7 +2393,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
278                 err = ext4_journal_get_write_access(handle, frame->bh);
279                 if (err)
280                         goto journal_error;
281 -               if (levels) {
282 +               if (!add_level) {
283                         unsigned icount1 = icount/2, icount2 = icount - icount1;
284                         unsigned hash2 = dx_get_hash(entries + icount1);
285                         dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
286 @@ -2363,7 +2401,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
287  
288                         BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
289                         err = ext4_journal_get_write_access(handle,
290 -                                                            frames[0].bh);
291 +                                                            (frame - 1)->bh);
292                         if (err)
293                                 goto journal_error;
294  
295 @@ -2379,18 +2417,26 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
296                                 frame->entries = entries = entries2;
297                                 swap(frame->bh, bh2);
298                         }
299 -                       dx_insert_block(frames + 0, hash2, newblock);
300 -                       dxtrace(dx_show_index("node", frames[1].entries));
301 +                       dx_insert_block((frame - 1), hash2, newblock);
302 +                       dxtrace(dx_show_index("node", frame->entries));
303                         dxtrace(dx_show_index("node",
304                                ((struct dx_node *) bh2->b_data)->entries));
305                         err = ext4_handle_dirty_dx_node(handle, dir, bh2);
306                         if (err)
307                                 goto journal_error;
308                         brelse (bh2);
309 +                       err = ext4_handle_dirty_dx_node(handle, dir,
310 +                                                  (frame - 1)->bh);
311 +                       if (err)
312 +                               goto journal_error;
313 +                       if (restart) {
314 +                               err = ext4_handle_dirty_dx_node(handle, dir,
315 +                                                          frame->bh);
316 +                               goto journal_error;
317 +                       }
318                 } else {
319                         struct dx_root_info *info;
320 -                       dxtrace(printk(KERN_DEBUG
321 -                                      "Creating second level index...\n"));
322 +
323                         memcpy((char *) entries2, (char *) entries,
324                                icount * sizeof(struct dx_entry));
325                         dx_set_limit(entries2, dx_node_limit(dir));
326 @@ -2400,22 +2446,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
327                         dx_set_block(entries + 0, newblock);
328                         info = dx_get_dx_info((struct ext4_dir_entry_2 *)
329                                               frames[0].bh->b_data);
330 -                       info->indirect_levels = 1;
331 -
332 -                       /* Add new access path frame */
333 -                       frame = frames + 1;
334 -                       frame->at = at = at - entries + entries2;
335 -                       frame->entries = entries = entries2;
336 -                       frame->bh = bh2;
337 -                       err = ext4_journal_get_write_access(handle,
338 -                                                            frame->bh);
339 +                       info->indirect_levels += 1;
340 +                       dxtrace(printk(KERN_DEBUG
341 +                                      "Creating %d level index...\n",
342 +                                      info->indirect_levels));
343 +                       err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
344                         if (err)
345                                 goto journal_error;
346 -               }
347 -               err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
348 -               if (err) {
349 -                       ext4_std_error(inode->i_sb, err);
350 -                       goto cleanup;
351 +                       err = ext4_handle_dirty_dx_node(handle, dir, bh2);
352 +                       brelse(bh2);
353 +                       restart = 1;
354 +                       goto journal_error;
355                 }
356         }
357         de = do_split(handle, dir, &bh, frame, &fname->hinfo);
358 @@ -2427,10 +2468,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
359         goto cleanup;
360  
361  journal_error:
362 -       ext4_std_error(dir->i_sb, err);
363 +       ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
364  cleanup:
365         brelse(bh);
366         dx_release(frames);
367 +       /* @restart is true means htree-path has been changed, we need to
368 +        * repeat dx_probe() to find out valid htree-path
369 +        */
370 +       if (restart && err == 0)
371 +               goto again;
372         return err;
373  }
374  
375 -- 
376 2.20.1
377