Whamcloud - gitweb
LU-12477 ldiskfs: remove obsolete ext4 patches
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel7.6 / ext4-large-dir.patch
1 This INCOMPAT_LARGEDIR feature allows larger directories
2 to be created in ldiskfs, both with directory sizes over
3 2GB and and a maximum htree depth of 3 instead of the
4 current limit of 2. These features are needed in order
5 to exceed the current limit of approximately 10M entries
6 in a single directory.
7
8 Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
9 ===================================================================
10 --- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/ext4.h
11 +++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
12 @@ -1585,7 +1585,8 @@ static inline void ext4_clear_state_flag
13                                          EXT4_FEATURE_INCOMPAT_EA_INODE| \
14                                          EXT4_FEATURE_INCOMPAT_MMP |    \
15                                          EXT4_FEATURE_INCOMPAT_DIRDATA| \
16 -                                        EXT4_FEATURE_INCOMPAT_INLINE_DATA)
17 +                                        EXT4_FEATURE_INCOMPAT_INLINE_DATA| \
18 +                                        EXT4_FEATURE_INCOMPAT_LARGEDIR)
19  #define EXT4_FEATURE_RO_COMPAT_SUPP    (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
20                                          EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
21                                          EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
22 @@ -1999,6 +2000,9 @@ struct mmpd_data {
23  # define NORET_TYPE    /**/
24  # define ATTRIB_NORET  __attribute__((noreturn))
25  # define NORET_AND     noreturn,
26 +/* htree levels for ext4 */
27 +#define EXT4_HTREE_LEVEL_COMPAT 2
28 +#define EXT4_HTREE_LEVEL       3
29  
30  struct ext4_xattr_ino_array {
31         unsigned int xia_count;         /* # of used item in the array */
32 @@ -2472,13 +2476,16 @@ static inline void ext4_r_blocks_count_s
33         es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
34  }
35  
36 -static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
37 +static inline loff_t ext4_isize(struct super_block *sb,
38 +                               struct ext4_inode *raw_inode)
39  {
40 -       if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
41 +       if (S_ISREG(le16_to_cpu(raw_inode->i_mode)) ||
42 +           (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) &&
43 +           S_ISDIR(le16_to_cpu(raw_inode->i_mode))))
44                 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
45                         le32_to_cpu(raw_inode->i_size_lo);
46 -       else
47 -               return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
48 +
49 +       return (loff_t)le32_to_cpu(raw_inode->i_size_lo);
50  }
51  
52  static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
53 Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
54 ===================================================================
55 --- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/namei.c
56 +++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
57 @@ -513,7 +513,14 @@ struct dx_root_info * dx_get_dx_info(str
58  
59  static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
60  {
61 -       return le32_to_cpu(entry->block) & 0x00ffffff;
62 +       return le32_to_cpu(entry->block) & 0x0fffffff;
63 +}
64 +
65 +static inline int
66 +ext4_dir_htree_level(struct super_block *sb)
67 +{
68 +       return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) ?
69 +               EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
70  }
71  
72  static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
73 @@ -681,7 +688,7 @@ dx_probe(const struct qstr *d_name, stru
74         struct dx_frame *frame = frame_in;
75         u32 hash;
76  
77 -       frame->bh = NULL;
78 +       memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
79         bh = ext4_read_dirblock(dir, 0, INDEX);
80         if (IS_ERR(bh)) {
81                 *err = PTR_ERR(bh);
82 @@ -714,10 +721,15 @@ dx_probe(const struct qstr *d_name, stru
83         }
84  
85         indirect = info->indirect_levels;
86 -       if (indirect > 1) {
87 -               ext4_warning(dir->i_sb,
88 -                            "inode #%lu: unimplemented hash depth %u",
89 -                            dir->i_ino, info->indirect_levels);
90 +       if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
91 +               ext4_warning(dir->i_sb,
92 +                            "inode #%lu: comm %s: htree depth %#06x exceed max depth %u",
93 +                            dir->i_ino, current->comm, indirect,
94 +                            ext4_dir_htree_level(dir->i_sb));
95 +               if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
96 +                       ext4_warning(dir->i_sb, "Enable large directory "
97 +                                               "feature to access it");
98 +               }
99                 brelse(bh);
100                 *err = ERR_BAD_DX_DIR;
101                 goto fail;
102 @@ -812,13 +826,18 @@ fail:
103  static void dx_release (struct dx_frame *frames)
104  {
105         struct dx_root_info *info;
106 +       int i;
107 +
108         if (frames[0].bh == NULL)
109                 return;
110  
111         info = dx_get_dx_info((struct ext4_dir_entry_2 *)frames[0].bh->b_data);
112 -       if (info->indirect_levels)
113 -               brelse(frames[1].bh);
114 -       brelse(frames[0].bh);
115 +       for (i = 0; i <= info->indirect_levels; i++) {
116 +               if (frames[i].bh == NULL)
117 +                       break;
118 +               brelse(frames[i].bh);
119 +               frames[i].bh = NULL;
120 +       }
121  }
122  
123  /*
124 @@ -960,7 +979,7 @@ int ext4_htree_fill_tree(struct file *di
125  {
126         struct dx_hash_info hinfo;
127         struct ext4_dir_entry_2 *de;
128 -       struct dx_frame frames[2], *frame;
129 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
130         struct inode *dir;
131         ext4_lblk_t block;
132         int count = 0;
133 @@ -1376,7 +1395,7 @@ static struct buffer_head * ext4_dx_find
134  {
135         struct super_block * sb = dir->i_sb;
136         struct dx_hash_info     hinfo;
137 -       struct dx_frame frames[2], *frame;
138 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
139         struct buffer_head *bh;
140         ext4_lblk_t block;
141         int retval;
142 @@ -1832,7 +1851,7 @@ static int make_indexed_dir(handle_t *ha
143         const char      *name = dentry->d_name.name;
144         int             namelen = dentry->d_name.len;
145         struct buffer_head *bh2;
146 -       struct dx_frame frames[2], *frame;
147 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
148         struct dx_entry *entries;
149         struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
150         struct ext4_dir_entry_tail *t;
151 @@ -2117,15 +2136,18 @@ static int ext4_add_entry(handle_t *hand
152  static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
153                              struct inode *inode)
154  {
155 -       struct dx_frame frames[2], *frame;
156 +       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
157         struct dx_entry *entries, *at;
158         struct dx_hash_info hinfo;
159         struct buffer_head *bh;
160         struct inode *dir = dentry->d_parent->d_inode;
161         struct super_block *sb = dir->i_sb;
162         struct ext4_dir_entry_2 *de;
163 +       int restart;
164         int err;
165  
166 +again:
167 +       restart = 0;
168         frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
169         if (!frame)
170                 return err;
171 @@ -2138,33 +2160,48 @@ static int ext4_dx_add_entry(handle_t *h
172                 goto cleanup;
173         }
174  
175 -       BUFFER_TRACE(bh, "get_write_access");
176 -       err = ext4_journal_get_write_access(handle, bh);
177 -       if (err)
178 -               goto journal_error;
179 -
180         err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
181         if (err != -ENOSPC)
182                 goto cleanup;
183  
184 +       err = 0;
185         /* Block full, should compress but for now just split */
186         dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
187                        dx_get_count(entries), dx_get_limit(entries)));
188         /* Need to split index? */
189         if (dx_get_count(entries) == dx_get_limit(entries)) {
190                 ext4_lblk_t newblock;
191 -               unsigned icount = dx_get_count(entries);
192 -               int levels = frame - frames;
193 +               int levels = frame - frames + 1;
194 +               unsigned icount;
195 +               int add_level = 1;
196                 struct dx_entry *entries2;
197                 struct dx_node *node2;
198                 struct buffer_head *bh2;
199  
200 -               if (levels && (dx_get_count(frames->entries) ==
201 -                              dx_get_limit(frames->entries))) {
202 -                       ext4_warning(sb, "Directory index full!");
203 +               while (frame > frames) {
204 +                       if (dx_get_count((frame - 1)->entries) <
205 +                           dx_get_limit((frame - 1)->entries)) {
206 +                               add_level = 0;
207 +                               break;
208 +                       }
209 +                       frame--; /* split higher index block */
210 +                       at = frame->at;
211 +                       entries = frame->entries;
212 +                       restart = 1;
213 +               }
214 +               if (add_level && levels == ext4_dir_htree_level(sb)) {
215 +                       ext4_warning(sb, "inode %lu: comm %s: index %u: reach max htree level %u",
216 +                                        dir->i_ino, current->comm, levels,
217 +                                        ext4_dir_htree_level(sb));
218 +                       if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
219 +                               ext4_warning(sb, "Large directory feature is"
220 +                                                "not enabled on this "
221 +                                                "filesystem");
222 +                       }
223                         err = -ENOSPC;
224                         goto cleanup;
225                 }
226 +               icount = dx_get_count(entries);
227                 bh2 = ext4_append(handle, dir, &newblock);
228                 if (IS_ERR(bh2)) {
229                         err = PTR_ERR(bh2);
230 @@ -2179,7 +2216,7 @@ static int ext4_dx_add_entry(handle_t *h
231                 err = ext4_journal_get_write_access(handle, frame->bh);
232                 if (err)
233                         goto journal_error;
234 -               if (levels) {
235 +               if (!add_level) {
236                         unsigned icount1 = icount/2, icount2 = icount - icount1;
237                         unsigned hash2 = dx_get_hash(entries + icount1);
238                         dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
239 @@ -2187,7 +2224,7 @@ static int ext4_dx_add_entry(handle_t *h
240  
241                         BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
242                         err = ext4_journal_get_write_access(handle,
243 -                                                            frames[0].bh);
244 +                                                           (frame - 1)->bh);
245                         if (err)
246                                 goto journal_error;
247  
248 @@ -2203,19 +2240,27 @@ static int ext4_dx_add_entry(handle_t *h
249                                 frame->entries = entries = entries2;
250                                 swap(frame->bh, bh2);
251                         }
252 -                       dx_insert_block(frames + 0, hash2, newblock);
253 -                       dxtrace(dx_show_index("node", frames[1].entries));
254 +                       dx_insert_block(frame - 1, hash2, newblock);
255 +                       dxtrace(dx_show_index("node", frame->entries));
256                         dxtrace(dx_show_index("node",
257 -                              ((struct dx_node *) bh2->b_data)->entries));
258 +                              ((struct dx_node *)bh2->b_data)->entries));
259                         err = ext4_handle_dirty_dx_node(handle, dir, bh2);
260                         if (err)
261                                 goto journal_error;
262                         brelse (bh2);
263 +                       err = ext4_handle_dirty_dx_node(handle, dir,
264 +                                                  (frame - 1)->bh);
265 +                       if (err)
266 +                               goto journal_error;
267 +                       if (restart) {
268 +                               err = ext4_handle_dirty_dx_node(handle, dir,
269 +                                                          frame->bh);
270 +                               goto journal_error;
271 +                       }
272                 } else {
273                         struct dx_root_info *info;
274 -                       dxtrace(printk(KERN_DEBUG
275 -                                      "Creating second level index...\n"));
276 -                       memcpy((char *) entries2, (char *) entries,
277 +
278 +                       memcpy((char *)entries2, (char *)entries,
279                                icount * sizeof(struct dx_entry));
280                         dx_set_limit(entries2, dx_node_limit(dir));
281  
282 @@ -2224,22 +2267,17 @@ static int ext4_dx_add_entry(handle_t *h
283                         dx_set_block(entries + 0, newblock);
284                         info = dx_get_dx_info((struct ext4_dir_entry_2*)
285                                               frames[0].bh->b_data);
286 -                       info->indirect_levels = 1;
287 -
288 -                       /* Add new access path frame */
289 -                       frame = frames + 1;
290 -                       frame->at = at = at - entries + entries2;
291 -                       frame->entries = entries = entries2;
292 -                       frame->bh = bh2;
293 -                       err = ext4_journal_get_write_access(handle,
294 -                                                            frame->bh);
295 +                       info->indirect_levels += 1;
296 +                       dxtrace(printk(KERN_DEBUG
297 +                                      "Creating %d level index...\n",
298 +                                      info->indirect_levels));
299 +                       err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
300                         if (err)
301                                 goto journal_error;
302 -               }
303 -               err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
304 -               if (err) {
305 -                       ext4_std_error(inode->i_sb, err);
306 -                       goto cleanup;
307 +                       err = ext4_handle_dirty_dx_node(handle, dir, bh2);
308 +                       brelse(bh2);
309 +                       restart = 1;
310 +                       goto journal_error;
311                 }
312         }
313         de = do_split(handle, dir, &bh, frame, &hinfo, &err);
314 @@ -2249,10 +2285,14 @@ static int ext4_dx_add_entry(handle_t *h
315         goto cleanup;
316  
317  journal_error:
318 -       ext4_std_error(dir->i_sb, err);
319 +       ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
320  cleanup:
321         brelse(bh);
322         dx_release(frames);
323 +       /* @restart is true means htree-path has been changed, we need to
324 +        * repeat dx_probe() to find out valid htree-path */
325 +       if (restart && err == 0)
326 +               goto again;
327         return err;
328  }
329  
330 Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c
331 ===================================================================
332 --- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/inode.c
333 +++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c
334 @@ -4056,12 +4056,12 @@ struct inode *ext4_iget(struct super_blo
335         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
336                 ei->i_file_acl |=
337                         ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
338 -       inode->i_size = ext4_isize(raw_inode);
339 +       inode->i_size = ext4_isize(sb, raw_inode);
340         if ((size = i_size_read(inode)) < 0) {
341                 EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
342                 ret = -EFSCORRUPTED;
343                 goto bad_inode;
344         }
345         ei->i_disksize = inode->i_size;
346  #ifdef CONFIG_QUOTA
347         ei->i_reserved_quota = 0;
348 @@ -4306,7 +4306,7 @@ static int ext4_do_update_inode(handle_t
349                 raw_inode->i_file_acl_high =
350                         cpu_to_le16(ei->i_file_acl >> 32);
351         raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
352 -       if (ei->i_disksize != ext4_isize(raw_inode)) {
353 +       if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
354                 ext4_isize_set(raw_inode, ei->i_disksize);
355                 need_datasync = 1;
356         }