--- /dev/null
+This INCOMPAT_LARGEDIR feature allows larger directories
+to be created in ldiskfs, both with directory sizes over
+2GB and and a maximum htree depth of 3 instead of the
+current limit of 2. These features are needed in order
+to exceed the current limit of approximately 10M entries
+in a single directory.
+
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -1391,6 +1391,7 @@ static inline void ext4_clear_state_flag
+ #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
+ #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
+ #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
++#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000
+
+ #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
+ #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
+@@ -1416,7 +1417,8 @@ static inline void ext4_clear_state_flag
+ EXT4_FEATURE_INCOMPAT_FLEX_BG| \
+ EXT4_FEATURE_INCOMPAT_EA_INODE| \
+ EXT4_FEATURE_INCOMPAT_MMP| \
+- EXT4_FEATURE_INCOMPAT_DIRDATA)
++ EXT4_FEATURE_INCOMPAT_DIRDATA| \
++ EXT4_FEATURE_INCOMPAT_LARGEDIR)
+
+ #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+@@ -1679,6 +1681,17 @@ ext4_group_first_block_no(struct super_b
+ */
+ #define ERR_BAD_DX_DIR -75000
+
++/* htree levels for ext4 */
++#define EXT4_HTREE_LEVEL_COMPAT 2
++#define EXT4_HTREE_LEVEL 3
++
++static inline int
++ext4_dir_htree_level(struct super_block *sb)
++{
++ return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) ?
++ EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
++}
++
+ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
+ ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
+
+@@ -2077,13 +2090,15 @@ static inline void ext4_r_blocks_count_s
+ es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
+ }
+
+-static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
++static inline loff_t ext4_isize(struct super_block *sb,
++ struct ext4_inode *raw_inode)
+ {
+- if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
++ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) ||
++ S_ISREG(le16_to_cpu(raw_inode->i_mode)))
+ return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
+ le32_to_cpu(raw_inode->i_size_lo);
+- else
+- return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
++
++ return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
+ }
+
+ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
+Index: linux-stage/fs/ext4/inode.c
+===================================================================
+--- linux-stage.orig/fs/ext4/inode.c
++++ linux-stage/fs/ext4/inode.c
+@@ -5007,7 +5007,7 @@ struct inode *ext4_iget(struct super_blo
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
+ ei->i_file_acl |=
+ ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
+- inode->i_size = ext4_isize(raw_inode);
++ inode->i_size = ext4_isize(sb, raw_inode);
+ if ((size = i_size_read(inode)) < 0) {
+ EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+ ret = -EIO;
+@@ -5253,7 +5253,7 @@ static int ext4_do_update_inode(handle_t
+ raw_inode->i_file_acl_high =
+ cpu_to_le16(ei->i_file_acl >> 32);
+ raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
+- if (ei->i_disksize != ext4_isize(raw_inode)) {
++ if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
+ ext4_isize_set(raw_inode, ei->i_disksize);
+ need_datasync = 1;
+ }
+Index: linux-stage/fs/ext4/namei.c
+===================================================================
+--- linux-stage.orig/fs/ext4/namei.c
++++ linux-stage/fs/ext4/namei.c
+@@ -209,7 +209,7 @@ struct dx_root_info * dx_get_dx_info(str
+
+ static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
+ {
+- return le32_to_cpu(entry->block) & 0x00ffffff;
++ return le32_to_cpu(entry->block) & 0x0fffffff;
+ }
+
+ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
+@@ -372,7 +372,7 @@ dx_probe(const struct qstr *d_name, stru
+ struct dx_frame *frame = frame_in;
+ u32 hash;
+
+- frame->bh = NULL;
++ memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
+ if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
+ goto fail;
+
+@@ -402,9 +402,16 @@ dx_probe(const struct qstr *d_name, stru
+ goto fail;
+ }
+
+- if ((indirect = info->indirect_levels) > 1) {
+- ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
+- info->indirect_levels);
++ indirect = info->indirect_levels;
++ if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
++ ext4_warning(dir->i_sb,
++ "Directory (ino: %lu) htree depth %#06x exceed "
++ "supported value", dir->i_ino,
++ ext4_dir_htree_level(dir->i_sb));
++ if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
++ ext4_warning(dir->i_sb, "Enable large directory "
++ "feature to access it");
++ }
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+@@ -496,13 +503,18 @@ fail:
+ static void dx_release (struct dx_frame *frames)
+ {
+ struct dx_root_info *info;
++ int i;
++
+ if (frames[0].bh == NULL)
+ return;
+
+ info = dx_get_dx_info((struct ext4_dir_entry_2*)frames[0].bh->b_data);
+- if (info->indirect_levels)
+- brelse(frames[1].bh);
+- brelse(frames[0].bh);
++ for (i = 0; i <= info->indirect_levels; i++) {
++ if (frames[i].bh == NULL)
++ break;
++ brelse(frames[i].bh);
++ frames[i].bh = NULL;
++ }
+ }
+
+ /*
+@@ -642,7 +654,7 @@ int ext4_htree_fill_tree(struct file *di
+ {
+ struct dx_hash_info hinfo;
+ struct ext4_dir_entry_2 *de;
+- struct dx_frame frames[2], *frame;
++ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
+ struct inode *dir;
+ ext4_lblk_t block;
+ int count = 0;
+@@ -983,7 +995,7 @@ static struct buffer_head * ext4_dx_find
+ struct super_block * sb;
+ struct dx_hash_info hinfo;
+ u32 hash;
+- struct dx_frame frames[2], *frame;
++ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
+ struct buffer_head *bh;
+ ext4_lblk_t block;
+ int retval;
+@@ -1423,7 +1435,7 @@ static int add_dirent_to_buf(handle_t *h
+ */
+ dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
+ ext4_update_dx_flag(dir);
+- dir->i_version++;
++ inode_inc_iversion(dir);
+ ext4_mark_inode_dirty(handle, dir);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, dir, bh);
+@@ -1443,7 +1455,7 @@ static int make_indexed_dir(handle_t *ha
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ struct buffer_head *bh2;
+- struct dx_frame frames[2], *frame;
++ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
+ struct dx_entry *entries;
+ struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
+ char *data1, *top;
+@@ -1692,15 +1704,18 @@ static int ext4_add_entry(handle_t *hand
+ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ struct inode *inode)
+ {
+- struct dx_frame frames[2], *frame;
++ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
+ struct dx_entry *entries, *at;
+ struct dx_hash_info hinfo;
+ struct buffer_head *bh;
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct super_block *sb = dir->i_sb;
+ struct ext4_dir_entry_2 *de;
++ int restart;
+ int err;
+
++again:
++ restart = 0;
+ frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
+ if (!frame)
+ return err;
+@@ -1710,33 +1725,48 @@ static int ext4_dx_add_entry(handle_t *h
+ if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+ goto cleanup;
+
+- BUFFER_TRACE(bh, "get_write_access");
+- err = ext4_journal_get_write_access(handle, bh);
+- if (err)
+- goto journal_error;
+-
+ err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+ if (err != -ENOSPC)
+ goto cleanup;
+
++ err = 0;
+ /* Block full, should compress but for now just split */
+ dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
+ dx_get_count(entries), dx_get_limit(entries)));
+ /* Need to split index? */
+ if (dx_get_count(entries) == dx_get_limit(entries)) {
+ ext4_lblk_t newblock;
+- unsigned icount = dx_get_count(entries);
+- int levels = frame - frames;
++ int levels = frame - frames + 1;
++ unsigned icount;
++ int add_level = 1;
+ struct dx_entry *entries2;
+ struct dx_node *node2;
+ struct buffer_head *bh2;
+
+- if (levels && (dx_get_count(frames->entries) ==
+- dx_get_limit(frames->entries))) {
+- ext4_warning(sb, "Directory index full!");
++ while (frame > frames) {
++ if (dx_get_count((frame - 1)->entries) <
++ dx_get_limit((frame - 1)->entries)) {
++ add_level = 0;
++ break;
++ }
++ frame--; /* split higher index block */
++ at = frame->at;
++ entries = frame->entries;
++ restart = 1;
++ }
++ if (add_level && levels == ext4_dir_htree_level(sb)) {
++ ext4_warning(sb, "Directory (ino: %lu) index full, "
++ "reach max htree level :%d",
++ dir->i_ino, levels);
++ if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
++ ext4_warning(sb, "Large directory feature is"
++ "not enabled on this "
++ "filesystem");
++ }
+ err = -ENOSPC;
+ goto cleanup;
+ }
++ icount = dx_get_count(entries);
+ bh2 = ext4_append (handle, dir, &newblock, &err);
+ if (!(bh2))
+ goto cleanup;
+@@ -1749,7 +1779,7 @@ static int ext4_dx_add_entry(handle_t *h
+ err = ext4_journal_get_write_access(handle, frame->bh);
+ if (err)
+ goto journal_error;
+- if (levels) {
++ if (!add_level) {
+ unsigned icount1 = icount/2, icount2 = icount - icount1;
+ unsigned hash2 = dx_get_hash(entries + icount1);
+ dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
+@@ -1757,7 +1787,7 @@ static int ext4_dx_add_entry(handle_t *h
+
+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
+ err = ext4_journal_get_write_access(handle,
+- frames[0].bh);
++ (frame - 1)->bh);
+ if (err)
+ goto journal_error;
+
+@@ -1773,18 +1803,24 @@ static int ext4_dx_add_entry(handle_t *h
+ frame->entries = entries = entries2;
+ swap(frame->bh, bh2);
+ }
+- dx_insert_block(frames + 0, hash2, newblock);
+- dxtrace(dx_show_index("node", frames[1].entries));
++ dx_insert_block((frame - 1), hash2, newblock);
++ dxtrace(dx_show_index("node", frame->entries));
+ dxtrace(dx_show_index("node",
+ ((struct dx_node *) bh2->b_data)->entries));
+ err = ext4_handle_dirty_metadata(handle, dir, bh2);
+ if (err)
+ goto journal_error;
+ brelse (bh2);
++ ext4_handle_dirty_metadata(handle, dir,
++ (frame - 1)->bh);
++ if (restart) {
++ ext4_handle_dirty_metadata(handle, dir,
++ frame->bh);
++ goto cleanup;
++ }
+ } else {
+ struct dx_root_info * info;
+- dxtrace(printk(KERN_DEBUG
+- "Creating second level index...\n"));
++
+ memcpy((char *) entries2, (char *) entries,
+ icount * sizeof(struct dx_entry));
+ dx_set_limit(entries2, dx_node_limit(dir));
+@@ -1794,19 +1830,16 @@ static int ext4_dx_add_entry(handle_t *h
+ dx_set_block(entries + 0, newblock);
+ info = dx_get_dx_info((struct ext4_dir_entry_2*)
+ frames[0].bh->b_data);
+- info->indirect_levels = 1;
+-
+- /* Add new access path frame */
+- frame = frames + 1;
+- frame->at = at = at - entries + entries2;
+- frame->entries = entries = entries2;
+- frame->bh = bh2;
+- err = ext4_journal_get_write_access(handle,
+- frame->bh);
+- if (err)
+- goto journal_error;
++ info->indirect_levels += 1;
++ dxtrace(printk(KERN_DEBUG
++ "Creating %d level index...\n",
++ info->indirect_levels));
++ ext4_handle_dirty_metadata(handle, dir, frame->bh);
++ ext4_handle_dirty_metadata(handle, dir, bh2);
++ brelse(bh2);
++ restart = 1;
++ goto cleanup;
+ }
+- err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
+ if (err) {
+ ext4_std_error(inode->i_sb, err);
+ goto cleanup;
+@@ -1824,6 +1857,10 @@ cleanup:
+ if (bh)
+ brelse(bh);
+ dx_release(frames);
++ /* @restart is true means htree-path has been changed, we need to
++ * repeat dx_probe() to find out valid htree-path */
++ if (restart && err == 0)
++ goto again;
+ return err;
+ }
+
+@@ -1862,7 +1899,7 @@ int ext4_delete_entry(handle_t *handle,
+ blocksize);
+ else
+ de->inode = 0;
+- dir->i_version++;
++ inode_inc_iversion(dir);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, dir, bh);
+ if (unlikely(err)) {