Whamcloud - gitweb
LU-11922 ldiskfs: make dirdata work with metadata_csum
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / sles12sp2 / ext4-large-dir.patch
1 This INCOMPAT_LARGEDIR feature allows larger directories
2 to be created in ldiskfs, both with directory sizes over
3 2GB and and a maximum htree depth of 3 instead of the
4 current limit of 2. These features are needed in order
5 to exceed the current limit of approximately 10M entries
6 in a single directory.
7
8 Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
9 ===================================================================
10 --- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/ext4.h
11 +++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
12 @@ -1585,6 +1585,7 @@ static inline void ext4_clear_state_flag
13                                          EXT4_FEATURE_INCOMPAT_MMP |    \
14                                          EXT4_FEATURE_INCOMPAT_DIRDATA| \
15                                          EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
16 +                                        EXT4_FEATURE_INCOMPAT_LARGEDIR | \
17                                          EXT4_FEATURE_INCOMPAT_ENCRYPT | \
18                                          EXT4_FEATURE_INCOMPAT_CSUM_SEED)
19  #define EXT4_FEATURE_RO_COMPAT_SUPP    (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
20 @@ -1999,6 +2000,9 @@ struct mmpd_data {
21  # define NORET_TYPE    /**/
22  # define ATTRIB_NORET  __attribute__((noreturn))
23  # define NORET_AND     noreturn,
24 +/* htree levels for ext4 */
25 +#define EXT4_HTREE_LEVEL_COMPAT 2
26 +#define EXT4_HTREE_LEVEL       3
27  
28  struct ext4_xattr_ino_array {
29         unsigned int xia_count;         /* # of used item in the array */
30 @@ -2472,13 +2476,16 @@ static inline void ext4_r_blocks_count_s
31         es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
32  }
33  
34 -static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
35 +static inline loff_t ext4_isize(struct super_block *sb,
36 +                               struct ext4_inode *raw_inode)
37  {
38 -       if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
39 +       if (S_ISREG(le16_to_cpu(raw_inode->i_mode)) ||
40 +           (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) &&
41 +           S_ISDIR(le16_to_cpu(raw_inode->i_mode))))
42                 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
43                         le32_to_cpu(raw_inode->i_size_lo);
44 -       else
45 -               return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
46 +
47 +       return (loff_t)le32_to_cpu(raw_inode->i_size_lo);
48  }
49  
50  static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
51 Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
52 ===================================================================
53 --- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/namei.c
54 +++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
55 @@ -513,7 +513,14 @@ struct dx_root_info * dx_get_dx_info(str
56  
57  static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
58  {
59 -       return le32_to_cpu(entry->block) & 0x00ffffff;
60 +       return le32_to_cpu(entry->block) & 0x0fffffff;
61 +}
62 +
63 +static inline int
64 +ext4_dir_htree_level(struct super_block *sb)
65 +{
66 +       return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) ?
67 +               EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
68  }
69  
70  static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
71 @@ -681,6 +688,7 @@ dx_probe(const struct qstr *d_name, stru
72         struct dx_frame *frame = frame_in;
73         u32 hash;
74  
75 +       memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
76         frame->bh = ext4_read_dirblock(dir, 0, INDEX);
77         if (IS_ERR(frame->bh))
78                 return (struct dx_frame *) frame->bh;
79 @@ -714,9 +721,13 @@ dx_probe(const struct qstr *d_name, stru
80         }
81  
82         indirect = info->indirect_levels;
83 -       if (indirect > 1) {
84 -               ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
85 -                                  info->indirect_levels);
86 +       if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
87 +               ext4_warning_inode(dir, "htree depth: %#06x exceed max depth %u",
88 +                                  indirect, ext4_dir_htree_level(dir->i_sb));
89 +               if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
90 +                       ext4_warning(dir->i_sb, "Enable large directory "
91 +                                               "feature to access it");
92 +               }
93                 goto fail;
94         }
95  
96 @@ -812,12 +826,20 @@ fail:
97  
98  static void dx_release (struct dx_frame *frames)
99  {
100 +       int i;
101 +       struct dx_root_info *info;
102 +
103         if (frames[0].bh == NULL)
104                 return;
105  
106 -       if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
107 -               brelse(frames[1].bh);
108 -       brelse(frames[0].bh);
109 +       for (i = 0, info = dx_get_dx_info((struct ext4_dir_entry_2 *)frames[0].bh->b_data);
110 +            i <= info->indirect_levels;
111 +            i++) {
112 +               if (frames[i].bh == NULL)
113 +                       break;
114 +               brelse(frames[i].bh);
115 +               frames[i].bh = NULL;
116 +       }
117  }
118  
119  /*
120 @@ -960,7 +979,7 @@ int ext4_htree_fill_tree(struct file *di
121  {
122         struct dx_hash_info hinfo;
123         struct ext4_dir_entry_2 *de;
124 -       struct dx_frame frames[2], *frame;
125 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
126         struct inode *dir;
127         ext4_lblk_t block;
128         int count = 0;
129 @@ -1376,7 +1395,7 @@ static struct buffer_head * ext4_dx_find
130         struct dx_hash_info     hinfo;
131  {
132         struct super_block * sb = dir->i_sb;
133 -       struct dx_frame frames[2], *frame;
134 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
135         const struct qstr *d_name = fname->usr_fname;
136         struct buffer_head *bh;
137         ext4_lblk_t block;
138 @@ -1832,7 +1851,7 @@ static int make_indexed_dir(handle_t *ha
139         const char      *name = dentry->d_name.name;
140         int             namelen = dentry->d_name.len;
141         struct buffer_head *bh2;
142 -       struct dx_frame frames[2], *frame;
143 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
144         struct dx_entry *entries;
145         struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
146         struct ext4_dir_entry_tail *t;
147 @@ -2117,14 +2136,17 @@ static int ext4_add_entry(handle_t *hand
148  static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
149                              struct inode *inode)
150  {
151 -       struct dx_frame frames[2], *frame;
152 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
153         struct dx_entry *entries, *at;
154         struct buffer_head *bh;
155         struct inode *dir = d_inode(dentry->d_parent);
156         struct super_block *sb = dir->i_sb;
157         struct ext4_dir_entry_2 *de;
158 +       int restart;
159         int err;
160  
161 +again:
162 +       restart = 0;
163         frame = dx_probe(fname, dir, NULL, frames);
164         if (IS_ERR(frame))
165                 return PTR_ERR(frame);
166 @@ -2138,33 +2160,48 @@ static int ext4_dx_add_entry(handle_t *h
167                 goto cleanup;
168         }
169  
170 -       BUFFER_TRACE(bh, "get_write_access");
171 -       err = ext4_journal_get_write_access(handle, bh);
172 -       if (err)
173 -               goto journal_error;
174 -
175         err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh, dentry);
176         if (err != -ENOSPC)
177                 goto cleanup;
178  
179 +       err = 0;
180         /* Block full, should compress but for now just split */
181         dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
182                        dx_get_count(entries), dx_get_limit(entries)));
183         /* Need to split index? */
184         if (dx_get_count(entries) == dx_get_limit(entries)) {
185                 ext4_lblk_t newblock;
186 -               unsigned icount = dx_get_count(entries);
187 -               int levels = frame - frames;
188 +               int levels = frame - frames + 1;
189 +               unsigned icount;
190 +               int add_level = 1;
191                 struct dx_entry *entries2;
192                 struct dx_node *node2;
193                 struct buffer_head *bh2;
194  
195 -               if (levels && (dx_get_count(frames->entries) ==
196 -                              dx_get_limit(frames->entries))) {
197 -                       ext4_warning_inode(dir, "Directory index full!");
198 +               while (frame > frames) {
199 +                       if (dx_get_count((frame - 1)->entries) <
200 +                           dx_get_limit((frame - 1)->entries)) {
201 +                               add_level = 0;
202 +                               break;
203 +                       }
204 +                       frame--; /* split higher index block */
205 +                       at = frame->at;
206 +                       entries = frame->entries;
207 +                       restart = 1;
208 +               }
209 +               if (add_level && levels == ext4_dir_htree_level(sb)) {
210 +                       ext4_warning(sb, "inode %lu: comm %s: index %u: reach max htree level %u",
211 +                                        dir->i_ino, current->comm, levels,
212 +                                        ext4_dir_htree_level(sb));
213 +                       if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
214 +                               ext4_warning(sb, "Large directory feature is"
215 +                                                "not enabled on this "
216 +                                                "filesystem");
217 +                       }
218                         err = -ENOSPC;
219                         goto cleanup;
220                 }
221 +               icount = dx_get_count(entries);
222                 bh2 = ext4_append(handle, dir, &newblock);
223                 if (IS_ERR(bh2)) {
224                         err = PTR_ERR(bh2);
225 @@ -2179,7 +2216,7 @@ static int ext4_dx_add_entry(handle_t *h
226                 err = ext4_journal_get_write_access(handle, frame->bh);
227                 if (err)
228                         goto journal_error;
229 -               if (levels) {
230 +               if (!add_level) {
231                         unsigned icount1 = icount/2, icount2 = icount - icount1;
232                         unsigned hash2 = dx_get_hash(entries + icount1);
233                         dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
234 @@ -2187,7 +2224,7 @@ static int ext4_dx_add_entry(handle_t *h
235  
236                         BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
237                         err = ext4_journal_get_write_access(handle,
238 -                                                            frames[0].bh);
239 +                                                           (frame - 1)->bh);
240                         if (err)
241                                 goto journal_error;
242  
243 @@ -2203,19 +2240,27 @@ static int ext4_dx_add_entry(handle_t *h
244                                 frame->entries = entries = entries2;
245                                 swap(frame->bh, bh2);
246                         }
247 -                       dx_insert_block(frames + 0, hash2, newblock);
248 -                       dxtrace(dx_show_index("node", frames[1].entries));
249 +                       dx_insert_block(frame - 1, hash2, newblock);
250 +                       dxtrace(dx_show_index("node", frame->entries));
251                         dxtrace(dx_show_index("node",
252 -                              ((struct dx_node *) bh2->b_data)->entries));
253 +                              ((struct dx_node *)bh2->b_data)->entries));
254                         err = ext4_handle_dirty_dx_node(handle, dir, bh2);
255                         if (err)
256                                 goto journal_error;
257                         brelse (bh2);
258 +                       err = ext4_handle_dirty_dx_node(handle, dir,
259 +                                                  (frame - 1)->bh);
260 +                       if (err)
261 +                               goto journal_error;
262 +                       if (restart) {
263 +                               err = ext4_handle_dirty_dx_node(handle, dir,
264 +                                                          frame->bh);
265 +                               goto journal_error;
266 +                       }
267                 } else {
268                         struct dx_root_info *info;
269 -                       dxtrace(printk(KERN_DEBUG
270 -                                      "Creating second level index...\n"));
271 -                       memcpy((char *) entries2, (char *) entries,
272 +
273 +                       memcpy((char *)entries2, (char *)entries,
274                                icount * sizeof(struct dx_entry));
275                         dx_set_limit(entries2, dx_node_limit(dir));
276  
277 @@ -2224,22 +2267,17 @@ static int ext4_dx_add_entry(handle_t *h
278                         dx_set_block(entries + 0, newblock);
279                         info = dx_get_dx_info((struct ext4_dir_entry_2*)
280                                               frames[0].bh->b_data);
281 -                       info->indirect_levels = 1;
282 -
283 -                       /* Add new access path frame */
284 -                       frame = frames + 1;
285 -                       frame->at = at = at - entries + entries2;
286 -                       frame->entries = entries = entries2;
287 -                       frame->bh = bh2;
288 -                       err = ext4_journal_get_write_access(handle,
289 -                                                            frame->bh);
290 +                       info->indirect_levels += 1;
291 +                       dxtrace(printk(KERN_DEBUG
292 +                                      "Creating %d level index...\n",
293 +                                      info->indirect_levels));
294 +                       err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
295                         if (err)
296                                 goto journal_error;
297 -               }
298 -               err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
299 -               if (err) {
300 -                       ext4_std_error(inode->i_sb, err);
301 -                       goto cleanup;
302 +                       err = ext4_handle_dirty_dx_node(handle, dir, bh2);
303 +                       brelse(bh2);
304 +                       restart = 1;
305 +                       goto journal_error;
306                 }
307         }
308         de = do_split(handle, dir, &bh, frame, &fname->hinfo);
309 @@ -2249,10 +2285,14 @@ static int ext4_dx_add_entry(handle_t *h
310         goto cleanup;
311  
312  journal_error:
313 -       ext4_std_error(dir->i_sb, err);
314 +       ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
315  cleanup:
316         brelse(bh);
317         dx_release(frames);
318 +       /* @restart is true means htree-path has been changed, we need to
319 +        * repeat dx_probe() to find out valid htree-path */
320 +       if (restart && err == 0)
321 +               goto again;
322         return err;
323  }
324  
325 Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c
326 ===================================================================
327 --- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/inode.c
328 +++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c
329 @@ -4056,12 +4056,12 @@ struct inode *ext4_iget(struct super_blo
330         if (ext4_has_feature_64bit(sb))
331                 ei->i_file_acl |=
332                         ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
333 -       inode->i_size = ext4_isize(raw_inode);
334 +       inode->i_size = ext4_isize(sb, raw_inode);
335         if ((size = i_size_read(inode)) < 0) {
336                 EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
337                 ret = -EFSCORRUPTED;
338                 goto bad_inode;
339         }
340         ei->i_disksize = inode->i_size;
341  #ifdef CONFIG_QUOTA
342         ei->i_reserved_quota = 0;
343 @@ -4306,7 +4306,7 @@ static int ext4_do_update_inode(handle_t
344                 raw_inode->i_file_acl_high =
345                         cpu_to_le16(ei->i_file_acl >> 32);
346         raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
347 -       if (ei->i_disksize != ext4_isize(raw_inode)) {
348 +       if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
349                 ext4_isize_set(raw_inode, ei->i_disksize);
350                 need_datasync = 1;
351         }