1 This INCOMPAT_LARGEDIR feature allows larger directories
2 to be created in ldiskfs, both with directory sizes over
3 2GB and and a maximum htree depth of 3 instead of the
4 current limit of 2. These features are needed in order
5 to exceed the current limit of approximately 10M entries
8 Index: linux-2.6.32-504.3.3.el6.x86_64/fs/ext4/ext4.h
9 ===================================================================
10 --- linux-2.6.32-504.3.3.el6.x86_64.orig/fs/ext4/ext4.h
11 +++ linux-2.6.32-504.3.3.el6.x86_64/fs/ext4/ext4.h
12 @@ -1344,6 +1344,7 @@ EXT4_INODE_BIT_FNS(state, state_flags)
13 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
14 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400
15 #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
16 +#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000
18 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
19 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
20 @@ -1354,7 +1355,8 @@ EXT4_INODE_BIT_FNS(state, state_flags)
21 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
22 EXT4_FEATURE_INCOMPAT_EA_INODE| \
23 EXT4_FEATURE_INCOMPAT_MMP| \
24 - EXT4_FEATURE_INCOMPAT_DIRDATA)
25 + EXT4_FEATURE_INCOMPAT_DIRDATA| \
26 + EXT4_FEATURE_INCOMPAT_LARGEDIR)
28 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
29 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
30 @@ -1612,6 +1614,17 @@ ext4_group_first_block_no(struct super_b
32 #define ERR_BAD_DX_DIR -75000
34 +/* htree levels for ext4 */
35 +#define EXT4_HTREE_LEVEL_COMPAT 2
36 +#define EXT4_HTREE_LEVEL 3
39 +ext4_dir_htree_level(struct super_block *sb)
41 + return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) ?
42 + EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
45 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
46 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
48 @@ -2005,13 +2018,15 @@ static inline void ext4_r_blocks_count_s
49 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
52 -static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
53 +static inline loff_t ext4_isize(struct super_block *sb,
54 + struct ext4_inode *raw_inode)
56 - if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
57 + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) ||
58 + S_ISREG(le16_to_cpu(raw_inode->i_mode)))
59 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
60 le32_to_cpu(raw_inode->i_size_lo);
62 - return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
64 + return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
67 static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
68 Index: linux-2.6.32-504.3.3.el6.x86_64/fs/ext4/inode.c
69 ===================================================================
70 --- linux-2.6.32-504.3.3.el6.x86_64.orig/fs/ext4/inode.c
71 +++ linux-2.6.32-504.3.3.el6.x86_64/fs/ext4/inode.c
72 @@ -5470,7 +5470,7 @@ struct inode *ext4_iget(struct super_blo
73 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
75 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
76 - inode->i_size = ext4_isize(raw_inode);
77 + inode->i_size = ext4_isize(sb, raw_inode);
78 ei->i_disksize = inode->i_size;
80 ei->i_reserved_quota = 0;
81 @@ -5654,7 +5654,7 @@ static int ext4_do_update_inode(handle_t
82 raw_inode->i_file_acl_high =
83 cpu_to_le16(ei->i_file_acl >> 32);
84 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
85 - if (ei->i_disksize != ext4_isize(raw_inode)) {
86 + if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
87 ext4_isize_set(raw_inode, ei->i_disksize);
90 Index: linux-2.6.32-504.3.3.el6.x86_64/fs/ext4/namei.c
91 ===================================================================
92 --- linux-2.6.32-504.3.3.el6.x86_64.orig/fs/ext4/namei.c
93 +++ linux-2.6.32-504.3.3.el6.x86_64/fs/ext4/namei.c
94 @@ -225,7 +225,7 @@ struct dx_root_info * dx_get_dx_info(str
96 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
98 - return le32_to_cpu(entry->block) & 0x00ffffff;
99 + return le32_to_cpu(entry->block) & 0x0fffffff;
102 static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
103 @@ -388,7 +388,7 @@ dx_probe(const struct qstr *d_name, stru
104 struct dx_frame *frame = frame_in;
108 + memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
109 if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
112 @@ -418,9 +418,16 @@ dx_probe(const struct qstr *d_name, stru
116 - if ((indirect = info->indirect_levels) > 1) {
117 - ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
118 - info->indirect_levels);
119 + indirect = info->indirect_levels;
120 + if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
121 + ext4_warning(dir->i_sb,
122 + "Directory (ino: %lu) htree depth %#06x exceed "
123 + "supported value", dir->i_ino,
124 + ext4_dir_htree_level(dir->i_sb));
125 + if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
126 + ext4_warning(dir->i_sb, "Enable large directory "
127 + "feature to access it");
130 *err = ERR_BAD_DX_DIR;
132 @@ -512,13 +519,18 @@ fail:
133 static void dx_release (struct dx_frame *frames)
135 struct dx_root_info *info;
138 if (frames[0].bh == NULL)
141 info = dx_get_dx_info((struct ext4_dir_entry_2*)frames[0].bh->b_data);
142 - if (info->indirect_levels)
143 - brelse(frames[1].bh);
144 - brelse(frames[0].bh);
145 + for (i = 0; i <= info->indirect_levels; i++) {
146 + if (frames[i].bh == NULL)
148 + brelse(frames[i].bh);
149 + frames[i].bh = NULL;
154 @@ -661,7 +673,7 @@ int ext4_htree_fill_tree(struct file *di
156 struct dx_hash_info hinfo;
157 struct ext4_dir_entry_2 *de;
158 - struct dx_frame frames[2], *frame;
159 + struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
163 @@ -1003,7 +1015,7 @@ static struct buffer_head * ext4_dx_find
164 struct super_block * sb;
165 struct dx_hash_info hinfo;
167 - struct dx_frame frames[2], *frame;
168 + struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
169 struct buffer_head *bh;
171 struct ext4_dir_entry_2 *de, *top;
172 @@ -1443,7 +1455,7 @@ static int add_dirent_to_buf(handle_t *h
174 dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
175 ext4_update_dx_flag(dir);
177 + inode_inc_iversion(dir);
178 ext4_mark_inode_dirty(handle, dir);
179 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
180 err = ext4_handle_dirty_metadata(handle, dir, bh);
181 @@ -1463,7 +1475,7 @@ static int make_indexed_dir(handle_t *ha
182 const char *name = dentry->d_name.name;
183 int namelen = dentry->d_name.len;
184 struct buffer_head *bh2;
185 - struct dx_frame frames[2], *frame;
186 + struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
187 struct dx_entry *entries;
188 struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
190 @@ -1712,15 +1724,18 @@ static int ext4_add_entry(handle_t *hand
191 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
194 - struct dx_frame frames[2], *frame;
195 + struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
196 struct dx_entry *entries, *at;
197 struct dx_hash_info hinfo;
198 struct buffer_head *bh;
199 struct inode *dir = dentry->d_parent->d_inode;
200 struct super_block *sb = dir->i_sb;
201 struct ext4_dir_entry_2 *de;
207 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
210 @@ -1730,33 +1745,48 @@ static int ext4_dx_add_entry(handle_t *h
211 if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
214 - BUFFER_TRACE(bh, "get_write_access");
215 - err = ext4_journal_get_write_access(handle, bh);
217 - goto journal_error;
219 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
224 /* Block full, should compress but for now just split */
225 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
226 dx_get_count(entries), dx_get_limit(entries)));
227 /* Need to split index? */
228 if (dx_get_count(entries) == dx_get_limit(entries)) {
229 ext4_lblk_t newblock;
230 - unsigned icount = dx_get_count(entries);
231 - int levels = frame - frames;
232 + int levels = frame - frames + 1;
235 struct dx_entry *entries2;
236 struct dx_node *node2;
237 struct buffer_head *bh2;
239 - if (levels && (dx_get_count(frames->entries) ==
240 - dx_get_limit(frames->entries))) {
241 - ext4_warning(sb, "Directory index full!");
242 + while (frame > frames) {
243 + if (dx_get_count((frame - 1)->entries) <
244 + dx_get_limit((frame - 1)->entries)) {
248 + frame--; /* split higher index block */
250 + entries = frame->entries;
253 + if (add_level && levels == ext4_dir_htree_level(sb)) {
254 + ext4_warning(sb, "Directory (ino: %lu) index full, "
255 + "reach max htree level :%d",
256 + dir->i_ino, levels);
257 + if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
258 + ext4_warning(sb, "Large directory feature is"
259 + "not enabled on this "
265 + icount = dx_get_count(entries);
266 bh2 = ext4_append (handle, dir, &newblock, &err);
269 @@ -1769,7 +1799,7 @@ static int ext4_dx_add_entry(handle_t *h
270 err = ext4_journal_get_write_access(handle, frame->bh);
275 unsigned icount1 = icount/2, icount2 = icount - icount1;
276 unsigned hash2 = dx_get_hash(entries + icount1);
277 dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
278 @@ -1777,7 +1807,7 @@ static int ext4_dx_add_entry(handle_t *h
280 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
281 err = ext4_journal_get_write_access(handle,
287 @@ -1793,18 +1823,24 @@ static int ext4_dx_add_entry(handle_t *h
288 frame->entries = entries = entries2;
289 swap(frame->bh, bh2);
291 - dx_insert_block(frames + 0, hash2, newblock);
292 - dxtrace(dx_show_index("node", frames[1].entries));
293 + dx_insert_block((frame - 1), hash2, newblock);
294 + dxtrace(dx_show_index("node", frame->entries));
295 dxtrace(dx_show_index("node",
296 ((struct dx_node *) bh2->b_data)->entries));
297 err = ext4_handle_dirty_metadata(handle, dir, bh2);
301 + ext4_handle_dirty_metadata(handle, dir,
304 + ext4_handle_dirty_metadata(handle, dir,
309 struct dx_root_info * info;
310 - dxtrace(printk(KERN_DEBUG
311 - "Creating second level index...\n"));
313 memcpy((char *) entries2, (char *) entries,
314 icount * sizeof(struct dx_entry));
315 dx_set_limit(entries2, dx_node_limit(dir));
316 @@ -1814,19 +1850,16 @@ static int ext4_dx_add_entry(handle_t *h
317 dx_set_block(entries + 0, newblock);
318 info = dx_get_dx_info((struct ext4_dir_entry_2*)
319 frames[0].bh->b_data);
320 - info->indirect_levels = 1;
322 - /* Add new access path frame */
323 - frame = frames + 1;
324 - frame->at = at = at - entries + entries2;
325 - frame->entries = entries = entries2;
327 - err = ext4_journal_get_write_access(handle,
330 - goto journal_error;
331 + info->indirect_levels += 1;
332 + dxtrace(printk(KERN_DEBUG
333 + "Creating %d level index...\n",
334 + info->indirect_levels));
335 + ext4_handle_dirty_metadata(handle, dir, frame->bh);
336 + ext4_handle_dirty_metadata(handle, dir, bh2);
341 - err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
343 ext4_std_error(inode->i_sb, err);
345 @@ -1840,6 +1873,10 @@ cleanup:
349 + /* @restart is true means htree-path has been changed, we need to
350 + * repeat dx_probe() to find out valid htree-path */
351 + if (restart && err == 0)
356 @@ -1874,7 +1911,7 @@ int ext4_delete_entry(handle_t *handle,
361 + inode_inc_iversion(dir);
362 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
363 ext4_handle_dirty_metadata(handle, dir, bh);