--- /dev/null
+--- ./fs/ext3/dir.c 2002/03/05 06:18:59 2.1
++++ ./fs/ext3/dir.c 2002/03/05 06:26:56
+@@ -26,7 +26,7 @@
+ DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
+ };
+
+-static int ext3_readdir(struct file *, void *, filldir_t);
++int ext3_readdir(struct file *, void *, filldir_t);
+
+ struct file_operations ext3_dir_operations = {
+ read: generic_read_dir,
+@@ -65,7 +65,7 @@
+ return error_msg == NULL ? 1 : 0;
+ }
+
+-static int ext3_readdir(struct file * filp,
++int ext3_readdir(struct file * filp,
+ void * dirent, filldir_t filldir)
+ {
+ int error = 0;
+--- ./fs/ext3/super.c 2002/03/05 06:18:59 2.1
++++ ./fs/ext3/super.c 2002/03/05 06:26:56
+@@ -529,6 +529,12 @@
+ "EXT3 Check option not supported\n");
+ #endif
+ }
++ else if (!strcmp (this_char, "index"))
++#ifdef CONFIG_EXT3_INDEX
++ set_opt (*mount_options, INDEX);
++#else
++ printk("EXT3 Index option not supported\n");
++#endif
+ else if (!strcmp (this_char, "debug"))
+ set_opt (*mount_options, DEBUG);
+ else if (!strcmp (this_char, "errors")) {
+@@ -712,6 +718,10 @@
+ EXT3_BLOCKS_PER_GROUP(sb),
+ EXT3_INODES_PER_GROUP(sb),
+ sbi->s_mount_opt);
++
++ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
++ set_opt (EXT3_SB(sb)->s_mount_opt, INDEX);
++
+ printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ",
+ bdevname(sb->s_dev));
+ if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
+--- ./fs/ext3/namei.c 2002/03/05 06:18:59 2.1
++++ ./fs/ext3/namei.c 2002/03/06 00:13:18
+@@ -16,6 +16,10 @@
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ * Directory entry file type support and forward compatibility hooks
+ * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
++ * Hash Tree Directory indexing (c)
++ * Daniel Phillips, 2001
++ * Hash Tree Directory indexing porting
++ * Christopher Li, 2002
+ */
+
+ #include <linux/fs.h>
+@@ -38,6 +42,436 @@
+ #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+ #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
+
++void ext3_add_compat_feature (struct super_block *sb, unsigned feature)
++{
++ if (!EXT3_HAS_COMPAT_FEATURE(sb, feature))
++ {
++ lock_super(sb);
++ ext3_update_dynamic_rev(sb);
++ EXT3_SET_COMPAT_FEATURE(sb, feature);
++ ext3_write_super(sb);
++ unlock_super(sb);
++ }
++}
++
++static struct buffer_head *ext3_append (handle_t *handle,
++ struct inode *inode,
++ u32 *block, int *err)
++{
++ struct buffer_head *bh;
++ *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
++ if((bh = ext3_bread (handle,inode, *block, 1, err))) {
++ inode->i_size += inode->i_sb->s_blocksize;
++ ext3_journal_get_write_access(handle,bh);
++ }
++ return bh;
++}
++
++#ifndef assert
++#define assert(test) do if (!(test)) BUG(); while (0)
++#endif
++
++#ifndef swap
++#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
++#endif
++
++typedef struct { u32 v; } le_u32;
++typedef struct { u16 v; } le_u16;
++
++#define dxtrace_on(command) command
++#define dxtrace_off(command)
++#define dxtrace dxtrace_off
++
++struct fake_dirent
++{
++ /*le*/u32 inode;
++ /*le*/u16 rec_len;
++ u8 name_len;
++ u8 file_type;
++};
++
++struct dx_countlimit
++{
++ le_u16 limit;
++ le_u16 count;
++};
++
++struct dx_entry
++{
++ le_u32 hash;
++ le_u32 block;
++};
++
++/*
++ * dx_root_info is laid out so that if it should somehow get overlaid by a
++ * dirent the two low bits of the hash version will be zero. Therefore, the
++ * hash version mod 4 should never be 0. Sincerely, the paranoia department.
++ */
++
++struct dx_root
++{
++ struct fake_dirent fake1;
++ char dot1[4];
++ struct fake_dirent fake2;
++ char dot2[4];
++ struct dx_root_info
++ {
++ le_u32 reserved_zero;
++ u8 hash_version; /* 0 now, 1 at release */
++ u8 info_length; /* 8 */
++ u8 indirect_levels;
++ u8 unused_flags;
++ }
++ info;
++ struct dx_entry entries[0];
++};
++
++struct dx_node
++{
++ struct fake_dirent fake;
++ struct dx_entry entries[0];
++};
++
++
++struct dx_frame
++{
++ struct buffer_head *bh;
++ struct dx_entry *entries;
++ struct dx_entry *at;
++};
++
++struct dx_map_entry
++{
++ u32 hash;
++ u32 offs;
++};
++
++typedef struct ext3_dir_entry_2 ext3_dirent;
++/* dx_static exists purely to suppress compiler warnings */
++static inline unsigned dx_get_block (struct dx_entry *entry);
++static void dx_set_block (struct dx_entry *entry, unsigned value);
++static inline unsigned dx_get_hash (struct dx_entry *entry);
++static void dx_set_hash (struct dx_entry *entry, unsigned value);
++static unsigned dx_get_count (struct dx_entry *entries);
++static unsigned dx_get_limit (struct dx_entry *entries);
++static void dx_set_count (struct dx_entry *entries, unsigned value);
++static void dx_set_limit (struct dx_entry *entries, unsigned value);
++static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
++static unsigned dx_node_limit (struct inode *dir);
++static unsigned dx_hack_hash (const u8 *name, int len);
++static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame);
++static void dx_release (struct dx_frame *frames);
++static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[]);
++static void dx_sort_map(struct dx_map_entry *map, unsigned count);
++static ext3_dirent *dx_copy_dirents (char *from, char *to,
++ struct dx_map_entry *map, int count);
++static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
++
++
++#ifdef CONFIG_EXT3_INDEX
++/*
++ * Future: use high four bits of block for coalesce-on-delete flags
++ * Mask them off for now.
++ */
++
++static inline unsigned dx_get_block (struct dx_entry *entry)
++{
++ return le32_to_cpu(entry->block.v) & 0x0fffffff;
++}
++
++static inline void dx_set_block (struct dx_entry *entry, unsigned value)
++{
++ entry->block.v = cpu_to_le32(value);
++}
++
++static inline unsigned dx_get_hash (struct dx_entry *entry)
++{
++ return le32_to_cpu(entry->hash.v);
++}
++
++static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
++{
++ entry->hash.v = cpu_to_le32(value);
++}
++
++static inline unsigned dx_get_count (struct dx_entry *entries)
++{
++ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
++}
++
++static inline unsigned dx_get_limit (struct dx_entry *entries)
++{
++ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
++}
++
++static inline void dx_set_count (struct dx_entry *entries, unsigned value)
++{
++ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
++}
++
++static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
++{
++ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
++}
++
++static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
++{
++ unsigned entry_space = dir->i_sb->s_blocksize - 24 - infosize;
++ return 0? 20: entry_space / sizeof(struct dx_entry);
++}
++
++static inline unsigned dx_node_limit (struct inode *dir)
++{
++ unsigned entry_space = dir->i_sb->s_blocksize - sizeof(struct fake_dirent);
++ return 0? 22: entry_space / sizeof(struct dx_entry);
++}
++
++/* Hash function - not bad, but still looking for an ideal default */
++
++static unsigned dx_hack_hash (const u8 *name, int len)
++{
++ u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
++ while (len--)
++ {
++ u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
++ if (hash & 0x80000000) hash -= 0x7fffffff;
++ hash1 = hash0;
++ hash0 = hash;
++ }
++ return 80; /* FIXME: for test only */
++ return hash0;
++}
++
++#define dx_hash(s,n) (dx_hack_hash(s,n) << 1)
++
++/*
++ * Debug
++ */
++static void dx_show_index (char * label, struct dx_entry *entries)
++{
++ int i, n = dx_get_count (entries);
++ printk("%s index ", label);
++ for (i = 0; i < n; i++)
++ {
++ printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
++ }
++ printk("\n");
++}
++
++struct stats
++{
++ unsigned names;
++ unsigned space;
++ unsigned bcount;
++};
++
++static struct stats dx_show_leaf (ext3_dirent *de, int size, int show_names)
++{
++ unsigned names = 0, space = 0;
++ char *base = (char *) de;
++ printk("names: ");
++ while ((char *) de < base + size)
++ {
++ if (de->inode)
++ {
++ if (show_names)
++ {
++ int len = de->name_len;
++ char *name = de->name;
++ while (len--) printk("%c", *name++);
++ printk(":%x.%u ", dx_hash (de->name, de->name_len), ((char *) de - base));
++ }
++ space += EXT3_DIR_REC_LEN(de->name_len);
++ names++;
++ }
++ de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
++ }
++ printk("(%i)\n", names);
++ return (struct stats) { names, space, 1 };
++}
++
++struct stats dx_show_entries (struct inode *dir, struct dx_entry *entries, int levels)
++{
++ unsigned blocksize = dir->i_sb->s_blocksize;
++ unsigned count = dx_get_count (entries), names = 0, space = 0, i;
++ unsigned bcount = 0;
++ struct buffer_head *bh;
++ int err;
++ printk("%i indexed blocks...\n", count);
++ for (i = 0; i < count; i++, entries++)
++ {
++ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
++ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
++ struct stats stats;
++ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
++ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
++ stats = levels?
++ dx_show_entries (dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
++ dx_show_leaf ((ext3_dirent *) bh->b_data, blocksize, 0);
++ names += stats.names;
++ space += stats.space;
++ bcount += stats.bcount;
++ brelse (bh);
++ }
++ if (bcount)
++ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ",
++ names, space/bcount,(space/bcount)*100/blocksize);
++ return (struct stats) { names, space, bcount};
++}
++
++static void dx_show_buckets (struct inode *dir)
++{
++ struct buffer_head *bh;
++ struct dx_root *root;
++ int err;
++ if (!(bh = ext3_bread (NULL,dir, 0, 0,&err))) return;
++ root = (struct dx_root *) bh->b_data;
++ dx_show_entries (dir, root->entries, root->info.indirect_levels);
++ brelse (bh);
++}
++
++ssize_t hack_show_dir (struct file * filp, void * dirent, filldir_t filldir)
++{
++ if (is_dx (filp->f_dentry->d_inode) && !filp->f_pos)
++ dx_show_buckets (filp->f_dentry->d_inode);
++ return ext3_readdir(filp,dirent,filldir);
++}
++
++/*
++ * Probe for a directory leaf block to search
++ */
++
++static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame)
++{
++ unsigned count, indirect;
++ struct dx_entry *at, *entries, *p, *q, *m;
++ struct dx_root *root;
++ struct buffer_head *bh;
++ int err;
++ if (!(bh = ext3_bread (NULL,dir, 0, 0,&err)))
++ goto fail;
++ root = (struct dx_root *) bh->b_data;
++ if (root->info.hash_version > 0 || root->info.unused_flags & 1)
++ goto fail;
++ if ((indirect = root->info.indirect_levels) > 1)
++ goto fail;
++ entries = (struct dx_entry *) (((char *) &root->info) + root->info.info_length);
++ assert (dx_get_limit(entries) == dx_root_limit(dir, root->info.info_length));
++ dxtrace (printk("Look up %x", hash));
++ while (1)
++ {
++ count = dx_get_count(entries);
++ assert (count && count <= dx_get_limit(entries));
++ p = entries + 1;
++ q = entries + count - 1;
++ while (p <= q)
++ {
++ m = p + (q - p)/2;
++ dxtrace(printk("."));
++ if (dx_get_hash(m) > hash)
++ q = m - 1;
++ else
++ p = m + 1;
++ }
++
++ if (0) // linear search cross check
++ {
++ unsigned n = count - 1;
++ at = entries;
++ while (n--)
++ {
++ dxtrace(printk(","));
++ if (dx_get_hash(++at) > hash)
++ {
++ at--;
++ break;
++ }
++ }
++ assert (at == p - 1);
++ }
++
++ at = p - 1;
++ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
++ frame->bh = bh;
++ frame->entries = entries;
++ frame->at = at;
++ if (!indirect--) return frame;
++ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0,&err)))
++ goto fail2;
++ at = entries = ((struct dx_node *) bh->b_data)->entries;
++ assert (dx_get_limit(entries) == dx_node_limit (dir));
++ frame++;
++ }
++fail2:
++ brelse(frame->bh);
++fail:
++ return NULL;
++}
++
++static void dx_release (struct dx_frame *frames)
++{
++ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
++ brelse (frames[1].bh);
++ brelse (frames[0].bh);
++}
++
++/*
++ * Directory block splitting, compacting
++ */
++
++static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[])
++{
++ int count = 0;
++ char *base = (char *) de;
++ while ((char *) de < base + size)
++ {
++ map[count].hash = dx_hash (de->name, de->name_len);
++ map[count].offs = (u32) ((char *) de - base);
++ de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
++ count++;
++ }
++ return count;
++}
++
++static void dx_sort_map (struct dx_map_entry *map, unsigned count)
++{
++ struct dx_map_entry *p, *q, *top = map + count - 1;
++ int more;
++ /* Combsort until bubble sort doesn't suck */
++ while (count > 2)
++ {
++ count = count*10/13;
++ if (count - 9 < 2) /* 9, 10 -> 11 */
++ count = 11;
++ for (p = top, q = p - count; q >= map; p--, q--)
++ if (p->hash < q->hash)
++ swap(*p, *q);
++ }
++ /* Garden variety bubble sort */
++ do {
++ more = 0;
++ q = top;
++ while (q-- > map)
++ {
++ if (q[1].hash >= q[0].hash)
++ continue;
++ swap(*(q+1), *q);
++ more = 1;
++ }
++ } while(more);
++}
++
++static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block)
++{
++ struct dx_entry *entries = frame->entries, *at = frame->at;
++ assert (dx_get_count (entries) < dx_get_limit (entries));
++ memmove (at + 2, at+1, (char *) (entries + dx_get_count(entries)) - (char *) (at));
++ dx_set_hash(at + 1, hash);
++ dx_set_block(at + 1, block);
++ dx_set_count(entries, dx_get_count(entries) + 1);
++}
++#endif
++
+ /*
+ * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
+ *
+@@ -95,6 +529,15 @@
+ }
+
+ /*
++ * p is at least 6 bytes before the end of page
++ */
++static inline ext3_dirent *ext3_next_entry(ext3_dirent *p)
++{
++ return (ext3_dirent *)((char*)p + le16_to_cpu(p->rec_len));
++}
++
++
++/*
+ * ext3_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+@@ -105,6 +548,8 @@
+ * The returned buffer_head has ->b_count elevated. The caller is expected
+ * to brelse() it when appropriate.
+ */
++
++
+ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
+ struct ext3_dir_entry_2 ** res_dir)
+ {
+@@ -119,10 +564,76 @@
+ int num = 0;
+ int nblocks, i, err;
+ struct inode *dir = dentry->d_parent->d_inode;
++ int namelen;
++ const u8 *name;
++ unsigned blocksize;
++ ext3_dirent *de, *top;
+
+ *res_dir = NULL;
+ sb = dir->i_sb;
++ blocksize = sb->s_blocksize;
++ namelen = dentry->d_name.len;
++ name = dentry->d_name.name;
++ if (namelen > EXT3_NAME_LEN)
++ return NULL;
++ if (ext3_dx && is_dx(dir)) {
++ u32 hash = dx_hash (name, namelen);
++ struct dx_frame frames[2], *frame;
++ if (!(frame = dx_probe (dir, hash, frames)))
++ return NULL;
++dxnext:
++ block = dx_get_block(frame->at);
++ if (!(bh = ext3_bread (NULL,dir, block, 0, &err)))
++ goto dxfail;
++ de = (struct ext3_dir_entry_2 *) bh->b_data;
++ top = (struct ext3_dir_entry_2 *) ((char *) de + blocksize
++ - EXT3_DIR_REC_LEN(0));
++ for (; de < top; de = ext3_next_entry(de))
++ if (ext3_match (namelen, name, de)) {
++ if (!ext3_check_dir_entry("ext3_find_entry",
++ dir, de, bh,
++ (block<<EXT3_BLOCK_SIZE_BITS(sb))
++ +((char *)de - bh->b_data))) {
++ brelse (bh);
++ goto dxfail;
++ }
++ *res_dir = de;
++ goto dxfound;
++ }
++ brelse (bh);
++ /* Same hash continues in next block? Search on. */
++ if (++(frame->at) == frame->entries + dx_get_count(frame->entries))
++ {
++ struct buffer_head *bh2;
++ if (frame == frames)
++ goto dxfail;
++ if (++(frames->at) == frames->entries + dx_get_count(frames->entries))
++ goto dxfail;
++ /* should omit read if not continued */
++ if (!(bh2 = ext3_bread (NULL, dir,
++ dx_get_block(frames->at),
++ 0, &err)))
++ goto dxfail;
++ brelse (frame->bh);
++ frame->bh = bh2;
++ frame->at = frame->entries = ((struct dx_node *) bh2->b_data)->entries;
++ /* Subtle: the 0th entry has the count, find the hash in frame above */
++ if ((dx_get_hash(frames->at) & -2) == hash)
++ goto dxnext;
++ goto dxfail;
++ }
++ if ((dx_get_hash(frame->at) & -2) == hash)
++ goto dxnext;
++dxfail:
++ dxtrace(printk("%s not found\n", name));
++ dx_release (frames);
++ return NULL;
++dxfound:
++ dx_release (frames);
++ return bh;
+
++ }
++
+ nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+ start = dir->u.ext3_i.i_dir_start_lookup;
+ if (start >= nblocks)
+@@ -237,6 +748,88 @@
+ de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+ }
+
++static ext3_dirent *
++dx_copy_dirents (char *from, char *to, struct dx_map_entry *map, int count)
++{
++ unsigned rec_len = 0;
++
++ while (count--) {
++ ext3_dirent *de = (ext3_dirent *) (from + map->offs);
++ rec_len = EXT3_DIR_REC_LEN(de->name_len);
++ memcpy (to, de, rec_len);
++ ((ext3_dirent *) to)->rec_len = rec_len;
++ to += rec_len;
++ map++;
++ }
++ return (ext3_dirent *) (to - rec_len);
++}
++
++#ifdef CONFIG_EXT3_INDEX
++static ext3_dirent *do_split(handle_t *handle, struct inode *dir,
++ struct buffer_head **bh,struct dx_frame *frame,
++ u32 hash, int *error)
++{
++ unsigned blocksize = dir->i_sb->s_blocksize;
++ unsigned count, continued;
++ struct buffer_head *bh2;
++ u32 newblock;
++ unsigned MAX_DX_MAP = PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1;
++ u32 hash2;
++ struct dx_map_entry map[MAX_DX_MAP];
++ char *data1 = (*bh)->b_data, *data2, *data3;
++ unsigned split;
++ ext3_dirent *de, *de2;
++
++ bh2 = ext3_append (handle, dir, &newblock, error);
++ if (!(bh2))
++ {
++ brelse(*bh);
++ *bh = NULL;
++ return (ext3_dirent *)bh2;
++ }
++
++ BUFFER_TRACE(*bh, "get_write_access");
++ ext3_journal_get_write_access(handle, *bh);
++ BUFFER_TRACE(frame->bh, "get_write_access");
++ ext3_journal_get_write_access(handle, frame->bh);
++
++ data2 = bh2->b_data;
++
++ count = dx_make_map ((ext3_dirent *) data1, blocksize, map);
++ split = count/2; // need to adjust to actual middle
++ dx_sort_map (map, count);
++ hash2 = map[split].hash;
++ continued = hash2 == map[split - 1].hash;
++ dxtrace(printk("Split block %i at %x, %i/%i\n",
++ dx_get_block(frame->at), hash2, split, count-split));
++
++ /* Fancy dance to stay within two buffers */
++ de2 = dx_copy_dirents (data1, data2, map + split, count - split);
++ data3 = (char *) de2 + de2->rec_len;
++ de = dx_copy_dirents (data1, data3, map, split);
++ memcpy(data1, data3, (char *) de + de->rec_len - data3);
++ de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de
++ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
++ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
++ dxtrace_on(dx_show_leaf ((ext3_dirent *) data1, blocksize, 1));
++ dxtrace_on(dx_show_leaf ((ext3_dirent *) data2, blocksize, 1));
++
++ /* Which block gets the new entry? */
++ if (hash >= hash2)
++ {
++ swap(*bh, bh2);
++ de = de2;
++ }
++ dx_insert_block (frame, hash2 + continued, newblock);
++ ext3_journal_dirty_metadata (handle, bh2);
++ brelse (bh2);
++ ext3_journal_dirty_metadata (handle, frame->bh);
++ dxtrace_on(dx_show_index ("frame", frame->entries));
++ return de;
++}
++#endif
++
++
+ /*
+ * ext3_add_entry()
+ *
+@@ -251,6 +844,7 @@
+ /*
+ * AKPM: the journalling code here looks wrong on the error paths
+ */
++
+ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
+ struct inode *inode)
+ {
+@@ -258,117 +852,284 @@
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ unsigned long offset;
+- unsigned short rec_len;
+ struct buffer_head * bh;
+ struct ext3_dir_entry_2 * de, * de1;
+- struct super_block * sb;
++ struct super_block * sb = dir->i_sb;
+ int retval;
++ unsigned short reclen = EXT3_DIR_REC_LEN(namelen);
+
+- sb = dir->i_sb;
++ unsigned blocksize = sb->s_blocksize;
++ unsigned nlen, rlen;
++ u32 block;
++ char *top;
+
+ if (!namelen)
+ return -EINVAL;
+- bh = ext3_bread (handle, dir, 0, 0, &retval);
+- if (!bh)
+- return retval;
+- rec_len = EXT3_DIR_REC_LEN(namelen);
+- offset = 0;
+- de = (struct ext3_dir_entry_2 *) bh->b_data;
+- while (1) {
+- if ((char *)de >= sb->s_blocksize + bh->b_data) {
+- brelse (bh);
+- bh = NULL;
+- bh = ext3_bread (handle, dir,
+- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
+- if (!bh)
+- return retval;
+- if (dir->i_size <= offset) {
+- if (dir->i_size == 0) {
+- brelse(bh);
+- return -ENOENT;
++ if (ext3_dx && is_dx(dir))
++ {
++ struct dx_frame frames[2], *frame;
++ struct dx_entry *entries, *at;
++ u32 hash;
++ char *data1;
++
++ hash = dx_hash (name, namelen);
++ frame = dx_probe (dir, hash, frames); // do something if null
++ entries = frame->entries;
++ at = frame->at;
++
++ if (!(bh = ext3_bread (handle,dir, dx_get_block(frame->at), 0,&retval)))
++ goto dxfail1;
++
++ BUFFER_TRACE(bh, "get_write_access");
++ ext3_journal_get_write_access(handle, bh);
++
++ data1 = bh->b_data;
++ de = (ext3_dirent *) data1;
++ top = data1 + (0? 200: blocksize);
++ while ((char *) de < top)
++ {
++ /* FIXME: check EEXIST and dir */
++ nlen = EXT3_DIR_REC_LEN(de->name_len);
++ rlen = le16_to_cpu(de->rec_len);
++ if ((de->inode? rlen - nlen: rlen) >= reclen)
++ goto dx_add;
++ de = (ext3_dirent *) ((char *) de + rlen);
++ }
++ /* Block full, should compress but for now just split */
++ dxtrace_on(printk("using %u of %u node entries\n",
++ dx_get_count(entries), dx_get_limit(entries)));
++ /* Need to split index? */
++ if (dx_get_count(entries) == dx_get_limit(entries))
++ {
++ u32 newblock;
++ unsigned icount = dx_get_count(entries);
++ char *idata2;
++ int levels = frame - frames;
++ struct dx_entry *entries2;
++ struct buffer_head *bh2;
++ if (levels && dx_get_count(frames->entries) == dx_get_limit(frames->entries))
++ goto dxfull;
++ bh2 = ext3_append (handle, dir, &newblock, &retval);
++ if (!(bh2))
++ goto dxfail2;
++ idata2 = bh2->b_data;
++ entries2 = ((struct dx_node *) idata2)->entries;
++ ((struct dx_node *) idata2)->fake.rec_len = cpu_to_le16(blocksize);
++ /* fake.inode already 0 */
++ /* Seems that is not true. We still need to set inode = 0 -Chrisl*/
++ ((struct dx_node *) idata2)->fake.inode = 0;
++ BUFFER_TRACE(frame->bh, "get_write_access");
++ ext3_journal_get_write_access(handle, frame->bh);
++ if (levels)
++ {
++ unsigned icount1 = icount/2, icount2 = icount - icount1;
++ unsigned hash2 = dx_get_hash(entries + icount1);
++ dxtrace_on(printk("Split index %i/%i\n", icount1, icount2));
++
++ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
++ ext3_journal_get_write_access(handle, frames[0].bh);
++
++ memcpy ((char *) entries2, (char *) (entries + icount1),
++ icount2 * sizeof(struct dx_entry));
++ dx_set_count (entries, icount1);
++ dx_set_count (entries2, icount2);
++ dx_set_limit (entries2, dx_node_limit(dir));
++
++ /* Which index block gets the new entry? */
++ if (at - entries > icount1) {
++ frame->at = at = at - entries - icount1 + entries2;
++ frame->entries = entries = entries2;
++ swap(frame->bh, bh2);
+ }
+-
+- ext3_debug ("creating next block\n");
+-
+- BUFFER_TRACE(bh, "get_write_access");
+- ext3_journal_get_write_access(handle, bh);
+- de = (struct ext3_dir_entry_2 *) bh->b_data;
+- de->inode = 0;
+- de->rec_len = le16_to_cpu(sb->s_blocksize);
+- dir->u.ext3_i.i_disksize =
+- dir->i_size = offset + sb->s_blocksize;
+- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+- ext3_mark_inode_dirty(handle, dir);
++ dx_insert_block (frames + 0, hash2, newblock);
++ dxtrace_on(dx_show_index ("node", frames[1].entries));
++ dxtrace_on(dx_show_index ("node",
++ ((struct dx_node *) bh2->b_data)->entries));
++ ext3_journal_dirty_metadata(handle, bh2);
++ brelse (bh2);
+ } else {
+-
+- ext3_debug ("skipping to next block\n");
+-
+- de = (struct ext3_dir_entry_2 *) bh->b_data;
++ dxtrace_on(printk("Creating second level index...\n"));
++ memcpy((char *) entries2, (char *) entries,
++ icount * sizeof(struct dx_entry));
++ dx_set_limit(entries2, dx_node_limit(dir));
++
++ /* Set up root */
++ dx_set_count(entries, 1);
++ dx_set_block(entries + 0, newblock);
++ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
++
++ /* Add new access path frame */
++ frame = frames + 1;
++ frame->at = at = at - entries + entries2;
++ frame->entries = entries = entries2;
++ frame->bh = bh2;
++ ext3_journal_get_write_access(handle, frame->bh);
+ }
++ ext3_journal_dirty_metadata(handle, frames[0].bh);
+ }
+- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
+- offset)) {
+- brelse (bh);
+- return -ENOENT;
+- }
+- if (ext3_match (namelen, name, de)) {
++ de = do_split(handle, dir, &bh, frame, hash, &retval);
++ dx_release (frames);
++ if (!(de))
++ goto fail;
++ nlen = EXT3_DIR_REC_LEN(de->name_len);
++ rlen = le16_to_cpu(de->rec_len);
++ goto add;
++
++dx_add:
++ dx_release (frames);
++ goto add;
++
++dxfull:
++ dxtrace_on(printk("Directory index full!\n"));
++ retval = -ENOSPC;
++dxfail2:
++ brelse(bh);
++dxfail1:
++ dx_release (frames);
++ goto fail1;
++ }
++ block = offset = 0;
++ while (offset < dir->i_size) {
++ bh = ext3_bread (handle, dir, block, 0, &retval);
++ if(!bh)
++ return retval;
++ de = (struct ext3_dir_entry_2 *) bh->b_data;
++ top = bh->b_data+blocksize-reclen;
++ while ((char *) de <= top) {
++
++ if (!ext3_check_dir_entry ("ext3_add_entry", dir, de,
++ bh,offset)) {
++ brelse (bh);
++ return -ENOENT;
++ }
++ if (ext3_match (namelen, name, de)) {
+ brelse (bh);
+ return -EEXIST;
+- }
+- if ((le32_to_cpu(de->inode) == 0 &&
+- le16_to_cpu(de->rec_len) >= rec_len) ||
+- (le16_to_cpu(de->rec_len) >=
+- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
+- BUFFER_TRACE(bh, "get_write_access");
+- ext3_journal_get_write_access(handle, bh);
+- /* By now the buffer is marked for journaling */
+- offset += le16_to_cpu(de->rec_len);
+- if (le32_to_cpu(de->inode)) {
+- de1 = (struct ext3_dir_entry_2 *) ((char *) de +
+- EXT3_DIR_REC_LEN(de->name_len));
+- de1->rec_len =
+- cpu_to_le16(le16_to_cpu(de->rec_len) -
+- EXT3_DIR_REC_LEN(de->name_len));
+- de->rec_len = cpu_to_le16(
+- EXT3_DIR_REC_LEN(de->name_len));
+- de = de1;
+ }
+- de->file_type = EXT3_FT_UNKNOWN;
+- if (inode) {
+- de->inode = cpu_to_le32(inode->i_ino);
+- ext3_set_de_type(dir->i_sb, de, inode->i_mode);
+- } else
+- de->inode = 0;
+- de->name_len = namelen;
+- memcpy (de->name, name, namelen);
+- /*
+- * XXX shouldn't update any times until successful
+- * completion of syscall, but too many callers depend
+- * on this.
+- *
+- * XXX similarly, too many callers depend on
+- * ext3_new_inode() setting the times, but error
+- * recovery deletes the inode, so the worst that can
+- * happen is that the times are slightly out of date
+- * and/or different from the directory change time.
+- */
+- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+- ext3_mark_inode_dirty(handle, dir);
+- dir->i_version = ++event;
+- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+- ext3_journal_dirty_metadata(handle, bh);
++ nlen = EXT3_DIR_REC_LEN(de->name_len);
++ rlen = le16_to_cpu(de->rec_len);
++ if ((de->inode? rlen - nlen: rlen) >= reclen)
++ goto add;
++ de = (struct ext3_dir_entry_2 *) ((char *) de + rlen);
++ offset += rlen;
++ }
++ if (ext3_dx && dir->i_size==blocksize && test_opt(sb, INDEX))
++ goto dx_make_index;
++ brelse(bh);
++ }
++ bh = ext3_append(handle, dir, &block, &retval);
++ if (!bh)
++ return retval;
++ de = (struct ext3_dir_entry_2 *) bh->b_data;
++ de->inode = 0;
++ de->rec_len = cpu_to_le16(rlen = blocksize);
++ nlen = 0;
++ goto add;
++
++add:
++ BUFFER_TRACE(bh, "get_write_access");
++ ext3_journal_get_write_access(handle, bh);
++ /* By now the buffer is marked for journaling */
++ if (de->inode) {
++ de1 = (struct ext3_dir_entry_2 *) ((char *) de + nlen);
++ de1->rec_len = cpu_to_le16(rlen - nlen);
++ de->rec_len = cpu_to_le16(nlen);
++ de = de1;
++ }
++ de->file_type = EXT3_FT_UNKNOWN;
++ if (inode) {
++ de->inode = cpu_to_le32(inode->i_ino);
++ ext3_set_de_type(dir->i_sb, de, inode->i_mode);
++ } else
++ de->inode = 0;
++ de->name_len = namelen;
++ memcpy (de->name, name, namelen);
++ /*
++ * XXX shouldn't update any times until successful
++ * completion of syscall, but too many callers depend
++ * on this.
++ *
++ * XXX similarly, too many callers depend on
++ * ext3_new_inode() setting the times, but error
++ * recovery deletes the inode, so the worst that can
++ * happen is that the times are slightly out of date
++ * and/or different from the directory change time.
++ */
++ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
++ /* dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; */
++ ext3_mark_inode_dirty(handle, dir);
++ dir->i_version = ++event;
++ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
++ ext3_journal_dirty_metadata(handle, bh);
++ brelse(bh);
++ return 0;
++
++dx_make_index:
++ {
++ struct buffer_head *bh2;
++ struct dx_root *root;
++ struct dx_frame frames[2], *frame;
++ struct dx_entry *entries;
++ struct ext3_dir_entry_2 *de2;
++ char *data1;
++ unsigned len;
++ u32 hash;
++
++ dxtrace_on(printk("Creating index\n"));
++ ext3_journal_get_write_access(handle, bh);
++ root = (struct dx_root *) bh->b_data;
++
++ ext3_add_compat_feature (sb, EXT3_FEATURE_COMPAT_DIR_INDEX);
++ dir->u.ext3_i.i_flags |= EXT3_INDEX_FL;
++ bh2 = ext3_append (handle, dir, &block, &retval);
++ if (!(bh2))
++ {
+ brelse(bh);
+- return 0;
++ return retval;
+ }
+- offset += le16_to_cpu(de->rec_len);
+- de = (struct ext3_dir_entry_2 *)
+- ((char *) de + le16_to_cpu(de->rec_len));
++ data1 = bh2->b_data;
++
++ /* The 0th block becomes the root, move the dirents out */
++ de = (ext3_dirent *) &root->info;
++ len = ((char *) root) + blocksize - (char *) de;
++ memcpy (data1, de, len);
++ de = (ext3_dirent *) data1;
++ top = data1 + len;
++ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
++ de = de2;
++ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
++ /* Initialize the root; the dot dirents already exist */
++ de = (ext3_dirent *) (&root->fake2);
++ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
++ memset (&root->info, 0, sizeof(root->info));
++ root->info.info_length = sizeof(root->info);
++ entries = root->entries;
++ dx_set_block (entries, 1);
++ dx_set_count (entries, 1);
++ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
++
++ /* Initialize as for dx_probe */
++ hash = dx_hash (name, namelen);
++ frame = frames;
++ frame->entries = entries;
++ frame->at = entries;
++ frame->bh = bh;
++ bh = bh2;
++ de = do_split(handle,dir, &bh, frame, hash, &retval);
++ dx_release (frames);
++ if (!(de))
++ return retval;
++ nlen = EXT3_DIR_REC_LEN(de->name_len);
++ rlen = le16_to_cpu(de->rec_len);
++ goto add;
+ }
+- brelse (bh);
+- return -ENOSPC;
++fail1:
++ return retval;
++fail:
++ return -ENOENT;
+ }
+
++
+ /*
+ * ext3_delete_entry deletes a directory entry by merging it with the
+ * previous entry
+@@ -451,7 +1212,8 @@
+ struct inode * inode;
+ int err;
+
+- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++ EXT3_INDEX_EXTRA_TRANS_BLOCKS+3);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+@@ -478,7 +1240,8 @@
+ struct inode *inode;
+ int err;
+
+- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+@@ -507,7 +1270,8 @@
+ if (dir->i_nlink >= EXT3_LINK_MAX)
+ return -EMLINK;
+
+- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+@@ -832,7 +1596,7 @@
+ ext3_mark_inode_dirty(handle, inode);
+ dir->i_nlink--;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++ // dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+ ext3_mark_inode_dirty(handle, dir);
+
+ end_rmdir:
+@@ -878,7 +1642,7 @@
+ if (retval)
+ goto end_unlink;
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++ // dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+ ext3_mark_inode_dirty(handle, dir);
+ inode->i_nlink--;
+ if (!inode->i_nlink)
+@@ -904,7 +1668,8 @@
+ if (l > dir->i_sb->s_blocksize)
+ return -ENAMETOOLONG;
+
+- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+@@ -959,7 +1724,8 @@
+ if (inode->i_nlink >= EXT3_LINK_MAX)
+ return -EMLINK;
+
+- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++ EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+@@ -995,7 +1761,8 @@
+
+ old_bh = new_bh = dir_bh = NULL;
+
+- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
++ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
++ EXT3_INDEX_EXTRA_TRANS_BLOCKS+ 2);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+@@ -1077,7 +1844,7 @@
+ new_inode->i_ctime = CURRENT_TIME;
+ }
+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++ // old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+ if (dir_bh) {
+ BUFFER_TRACE(dir_bh, "get_write_access");
+ ext3_journal_get_write_access(handle, dir_bh);
+@@ -1089,7 +1856,7 @@
+ new_inode->i_nlink--;
+ } else {
+ new_dir->i_nlink++;
+- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++ // new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+ ext3_mark_inode_dirty(handle, new_dir);
+ }
+ }
+--- ./include/linux/ext3_fs.h 2002/03/05 06:18:59 2.1
++++ ./include/linux/ext3_fs.h 2002/03/05 06:26:56
+@@ -339,6 +339,7 @@
+ #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */
+ #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
+ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
++#define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+@@ -575,6 +576,26 @@
+ #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
+ #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
+ ~EXT3_DIR_ROUND)
++/*
++ * Hash Tree Directory indexing
++ * (c) Daniel Phillips, 2001
++ */
++
++#define CONFIG_EXT3_INDEX
++
++#ifdef CONFIG_EXT3_INDEX
++ enum {ext3_dx = 1};
++ #define dx_static static
++ #define is_dx(dir) ((dir)->u.ext3_i.i_flags & EXT3_INDEX_FL)
++#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
++#else
++ enum {ext3_dx = 0};
++ #define dx_static
++ #define is_dx(dir) 0
++#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
++#endif
+
+ #ifdef __KERNEL__
+ /*
+--- ./include/linux/ext3_jbd.h 2002/03/05 06:18:59 2.1
++++ ./include/linux/ext3_jbd.h 2002/03/05 06:33:54
+@@ -63,6 +63,8 @@
+
+ #define EXT3_RESERVE_TRANS_BLOCKS 12
+
++#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
++
+ int
+ ext3_mark_iloc_dirty(handle_t *handle,
+ struct inode *inode,