Whamcloud - gitweb
- merge 0.7rc1 from b_devel to HEAD (20030612 merge point)
[fs/lustre-release.git] / lustre / extN / htree-ext3-2.4.18.diff
diff --git a/lustre/extN/htree-ext3-2.4.18.diff b/lustre/extN/htree-ext3-2.4.18.diff
deleted file mode 100644 (file)
index 4251251..0000000
+++ /dev/null
@@ -1,1201 +0,0 @@
---- ./fs/ext3/super.c  2002/03/05 06:18:59     2.1
-+++ ./fs/ext3/super.c  2002/03/05 06:26:56
-@@ -529,6 +529,12 @@
-                                      "EXT3 Check option not supported\n");
- #endif
-               }
-+              else if (!strcmp (this_char, "index"))
-+#ifdef CONFIG_EXT3_INDEX
-+                      set_opt (*mount_options, INDEX);
-+#else
-+                      printk("EXT3 index option not supported\n");
-+#endif
-               else if (!strcmp (this_char, "debug"))
-                       set_opt (*mount_options, DEBUG);
-               else if (!strcmp (this_char, "errors")) {
-@@ -702,6 +708,12 @@ static int ext3_setup_super(struct super
-       es->s_mtime = cpu_to_le32(CURRENT_TIME);
-       ext3_update_dynamic_rev(sb);
-       EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-+
-+      if (test_opt(sb, INDEX))
-+              EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX);
-+      else if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
-+              set_opt (EXT3_SB(sb)->s_mount_opt, INDEX);
-+
-       ext3_commit_super (sb, es, 1);
-       if (test_opt (sb, DEBUG))
-               printk (KERN_INFO
---- ./fs/ext3/namei.c  2002/03/05 06:18:59     2.1
-+++ ./fs/ext3/namei.c  2002/03/06 00:13:18
-@@ -16,6 +16,10 @@
-  *        David S. Miller (davem@caip.rutgers.edu), 1995
-  *  Directory entry file type support and forward compatibility hooks
-  *    for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
-+ *  Hash Tree Directory indexing (c)
-+ *    Daniel Phillips, 2001
-+ *  Hash Tree Directory indexing porting
-+ *    Christopher Li, 2002
-  */
- #include <linux/fs.h>
-@@ -33,7 +33,7 @@
- #include <linux/string.h>
- #include <linux/locks.h>
- #include <linux/quotaops.h>
--
-+#include <linux/slab.h>
- /*
-  * define how far ahead to read directories while searching them.
-@@ -38,6 +42,437 @@
- #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
- #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
-+static struct buffer_head *ext3_append(handle_t *handle,
-+                                      struct inode *inode,
-+                                      u32 *block, int *err)
-+{
-+      struct buffer_head *bh;
-+
-+      *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-+
-+      if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
-+              inode->i_size += inode->i_sb->s_blocksize;
-+              EXT3_I(inode)->i_disksize = inode->i_size;
-+              ext3_journal_get_write_access(handle,bh);
-+      }
-+      return bh;
-+}
-+
-+#ifndef assert
-+#define assert(test) J_ASSERT(test)
-+#endif
-+
-+#ifndef swap
-+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-+#endif
-+
-+typedef struct { u32 v; } le_u32;
-+typedef struct { u16 v; } le_u16;
-+
-+#define dxtrace_on(command) command
-+#define dxtrace_off(command)
-+
-+struct fake_dirent
-+{
-+      /*le*/u32 inode;
-+      /*le*/u16 rec_len;
-+      u8 name_len;
-+      u8 file_type;
-+};
-+
-+struct dx_countlimit
-+{
-+      le_u16 limit;
-+      le_u16 count;
-+};
-+
-+struct dx_entry
-+{
-+      le_u32 hash;
-+      le_u32 block;
-+};
-+
-+/*
-+ * dx_root_info is laid out so that if it should somehow get overlaid by a
-+ * dirent the two low bits of the hash version will be zero.  Therefore, the
-+ * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
-+ */
-+
-+struct dx_root
-+{
-+      struct fake_dirent dot;
-+      char dot_name[4];
-+      struct fake_dirent dotdot;
-+      char dotdot_name[4];
-+      struct dx_root_info
-+      {
-+              le_u32 reserved_zero;
-+              u8 hash_version; /* 0 now, 1 at release */
-+              u8 info_length; /* 8 */
-+              u8 indirect_levels;
-+              u8 unused_flags;
-+      }
-+      info;
-+      struct dx_entry entries[0];
-+};
-+
-+struct dx_node
-+{
-+      struct fake_dirent fake;
-+      struct dx_entry entries[0];
-+};
-+
-+
-+struct dx_frame
-+{
-+      struct buffer_head *bh;
-+      struct dx_entry *entries;
-+      struct dx_entry *at;
-+};
-+
-+struct dx_map_entry
-+{
-+      u32 hash;
-+      u32 offs;
-+};
-+
-+typedef struct ext3_dir_entry_2 ext3_dirent;
-+static inline unsigned dx_get_block (struct dx_entry *entry);
-+static void dx_set_block (struct dx_entry *entry, unsigned value);
-+static inline unsigned dx_get_hash (struct dx_entry *entry);
-+static void dx_set_hash (struct dx_entry *entry, unsigned value);
-+static unsigned dx_get_count (struct dx_entry *entries);
-+static unsigned dx_get_limit (struct dx_entry *entries);
-+static void dx_set_count (struct dx_entry *entries, unsigned value);
-+static void dx_set_limit (struct dx_entry *entries, unsigned value);
-+static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
-+static unsigned dx_node_limit (struct inode *dir);
-+static unsigned dx_hack_hash (const u8 *name, int len);
-+static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame);
-+static void dx_release (struct dx_frame *frames);
-+static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[]);
-+static void dx_sort_map(struct dx_map_entry *map, unsigned count);
-+static ext3_dirent *dx_copy_dirents (char *from, char *to,
-+     struct dx_map_entry *map, int count);
-+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
-+
-+
-+#ifdef CONFIG_EXT3_INDEX
-+/*
-+ * Future: use high four bits of block for coalesce-on-delete flags
-+ * Mask them off for now.
-+ */
-+
-+static inline unsigned dx_get_block (struct dx_entry *entry)
-+{
-+      return le32_to_cpu(entry->block.v) & 0x00ffffff;
-+}
-+
-+static inline void dx_set_block (struct dx_entry *entry, unsigned value)
-+{
-+      entry->block.v = cpu_to_le32(value);
-+}
-+
-+static inline unsigned dx_get_hash (struct dx_entry *entry)
-+{
-+      return le32_to_cpu(entry->hash.v);
-+}
-+
-+static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
-+{
-+      entry->hash.v = cpu_to_le32(value);
-+}
-+
-+static inline unsigned dx_get_count (struct dx_entry *entries)
-+{
-+      return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
-+}
-+
-+static inline unsigned dx_get_limit (struct dx_entry *entries)
-+{
-+      return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
-+}
-+
-+static inline void dx_set_count (struct dx_entry *entries, unsigned value)
-+{
-+      ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
-+}
-+
-+static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
-+{
-+      ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
-+}
-+
-+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
-+{
-+      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
-+              EXT3_DIR_REC_LEN(2) - infosize;
-+      return 0? 20: entry_space / sizeof(struct dx_entry);
-+}
-+
-+static inline unsigned dx_node_limit (struct inode *dir)
-+{
-+      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
-+      return 0? 22: entry_space / sizeof(struct dx_entry);
-+}
-+
-+/* Hash function - not bad, but still looking for an ideal default */
-+
-+static unsigned dx_hack_hash (const u8 *name, int len)
-+{
-+      u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
-+      while (len--)
-+      {
-+              u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
-+              if (hash & 0x80000000) hash -= 0x7fffffff;
-+              hash1 = hash0;
-+              hash0 = hash;
-+      }
-+      return hash0;
-+}
-+
-+#define dx_hash(s,n) (dx_hack_hash(s,n) << 1)
-+
-+/*
-+ * Debug
-+ */
-+#ifdef DX_DEBUG
-+#define dxtrace dxtrace_on
-+static void dx_show_index (char * label, struct dx_entry *entries)
-+{
-+      int i, n = dx_get_count (entries);
-+      printk("%s index ", label);
-+      for (i = 0; i < n; i++)
-+      {
-+              printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
-+      }
-+      printk("\n");
-+}
-+
-+struct stats
-+{ 
-+      unsigned names;
-+      unsigned space;
-+      unsigned bcount;
-+};
-+
-+static struct stats dx_show_leaf (ext3_dirent *de, int size, int show_names)
-+{
-+      unsigned names = 0, space = 0;
-+      char *base = (char *) de;
-+      printk("names: ");
-+      while ((char *) de < base + size)
-+      {
-+              if (de->inode)
-+              {
-+                      if (show_names)
-+                      {
-+                              int len = de->name_len;
-+                              char *name = de->name;
-+                              while (len--) printk("%c", *name++);
-+                              printk(":%x.%u ", dx_hash (de->name, de->name_len), ((char *) de - base));
-+                      }
-+                      space += EXT3_DIR_REC_LEN(de->name_len);
-+                      names++;
-+              }
-+              de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
-+      }
-+      printk("(%i)\n", names);
-+      return (struct stats) { names, space, 1 };
-+}
-+
-+struct stats dx_show_entries (struct inode *dir, struct dx_entry *entries, int levels)
-+{
-+      unsigned blocksize = dir->i_sb->s_blocksize;
-+      unsigned count = dx_get_count (entries), names = 0, space = 0, i;
-+      unsigned bcount = 0;
-+      struct buffer_head *bh;
-+      int err;
-+      printk("%i indexed blocks...\n", count);
-+      for (i = 0; i < count; i++, entries++)
-+      {
-+              u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
-+              u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
-+              struct stats stats;
-+              printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
-+              if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
-+              stats = levels?
-+                 dx_show_entries (dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
-+                 dx_show_leaf ((ext3_dirent *) bh->b_data, blocksize, 0);
-+              names += stats.names;
-+              space += stats.space;
-+              bcount += stats.bcount;
-+              brelse (bh);
-+      }
-+      if (bcount)
-+              printk("%snames %u, fullness %u (%u%%)\n", levels?"":"   ",
-+                      names, space/bcount,(space/bcount)*100/blocksize);
-+      return (struct stats) { names, space, bcount};
-+}
-+#else
-+#define dxtrace dxtrace_off
-+#endif
-+
-+/*
-+ * Probe for a directory leaf block to search
-+ */
-+
-+static struct dx_frame *
-+dx_probe(struct inode *dir, u32 hash, struct dx_frame *frame_in)
-+{
-+      unsigned count, indirect;
-+      struct dx_entry *at, *entries, *p, *q, *m;
-+      struct dx_root *root;
-+      struct buffer_head *bh;
-+      struct dx_frame *frame = frame_in;
-+      int err;
-+
-+      frame->bh = NULL;
-+      if (!(bh = ext3_bread(NULL, dir, 0, 0, &err)))
-+              goto fail;
-+      root = (struct dx_root *) bh->b_data;
-+      if (root->info.hash_version > 0 || root->info.unused_flags & 1) {
-+              brelse(bh);
-+              goto fail;
-+      }
-+      if ((indirect = root->info.indirect_levels) > 1) {
-+              brelse(bh);
-+              goto fail;
-+      }
-+      entries = (struct dx_entry *) (((char *) &root->info) + root->info.info_length);
-+      assert (dx_get_limit(entries) == dx_root_limit(dir, root->info.info_length));
-+      dxtrace (printk("Look up %x", hash));
-+      while (1)
-+      {
-+              count = dx_get_count(entries);
-+              assert (count && count <= dx_get_limit(entries));
-+              p = entries + 1;
-+              q = entries + count - 1;
-+              while (p <= q)
-+              {
-+                      m = p + (q - p)/2;
-+                      dxtrace(printk("."));
-+                      if (dx_get_hash(m) > hash)
-+                              q = m - 1;
-+                      else
-+                              p = m + 1;
-+              }
-+
-+              if (0) // linear search cross check
-+              {
-+                      unsigned n = count - 1;
-+                      at = entries;
-+                      while (n--)
-+                      {
-+                              dxtrace(printk(","));
-+                              if (dx_get_hash(++at) > hash)
-+                              {
-+                                      at--;
-+                                      break;
-+                              }
-+                      }
-+                      assert (at == p - 1);
-+              }
-+
-+              at = p - 1;
-+              dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
-+              frame->bh = bh;
-+              frame->entries = entries;
-+              frame->at = at;
-+              if (!indirect--) return frame;
-+              if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0,&err)))
-+                      goto fail2;
-+              at = entries = ((struct dx_node *) bh->b_data)->entries;
-+              assert (dx_get_limit(entries) == dx_node_limit (dir));
-+              frame++;
-+      }
-+fail2:
-+      while (frame >= frame_in) {
-+              brelse(frame->bh);
-+              frame--;
-+      }
-+fail:
-+      return NULL;
-+}
-+
-+static void dx_release (struct dx_frame *frames)
-+{
-+      if (frames[0].bh == NULL)
-+              return;
-+
-+      if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
-+              brelse (frames[1].bh);
-+      brelse (frames[0].bh);
-+}
-+
-+/*
-+ * Directory block splitting, compacting
-+ */
-+
-+static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[])
-+{
-+      int count = 0;
-+      char *base = (char *) de;
-+      while ((char *) de < base + size) {
-+              if (de->name_len && de->inode) {
-+                      map[count].hash = dx_hash (de->name, de->name_len);
-+                      map[count].offs = (u32) ((char *) de - base);
-+                      count++;
-+              }
-+              de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
-+      }
-+      return count;
-+}
-+
-+static void dx_sort_map (struct dx_map_entry *map, unsigned count)
-+{
-+        struct dx_map_entry *p, *q, *top = map + count - 1;
-+        int more;
-+        /* Combsort until bubble sort doesn't suck */
-+        while (count > 2)
-+      {
-+                count = count*10/13;
-+                if (count - 9 < 2) /* 9, 10 -> 11 */
-+                        count = 11;
-+                for (p = top, q = p - count; q >= map; p--, q--)
-+                        if (p->hash < q->hash)
-+                                swap(*p, *q);
-+        }
-+        /* Garden variety bubble sort */
-+        do {
-+                more = 0;
-+                q = top;
-+                while (q-- > map)
-+              {
-+                        if (q[1].hash >= q[0].hash)
-+                              continue;
-+                        swap(*(q+1), *q);
-+                        more = 1;
-+              }
-+      } while(more);
-+}
-+
-+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
-+{
-+      struct dx_entry *entries = frame->entries;
-+      struct dx_entry *old = frame->at, *new = old + 1;
-+      int count = dx_get_count(entries);
-+
-+      assert(count < dx_get_limit(entries));
-+      assert(old < entries + count);
-+      memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
-+      dx_set_hash(new, hash);
-+      dx_set_block(new, block);
-+      dx_set_count(entries, count + 1);
-+}
-+#endif
-+
-+static void ext3_update_dx_flag(struct inode *inode)
-+{
-+      if (!test_opt(inode->i_sb, INDEX))
-+              EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
-+}
-+
- /*
-  * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
-  *
-@@ -95,6 +529,15 @@
- }
- /*
-+ * p is at least 6 bytes before the end of page
-+ */
-+static inline ext3_dirent *ext3_next_entry(ext3_dirent *p)
-+{
-+      return (ext3_dirent *)((char*)p + le16_to_cpu(p->rec_len));
-+}
-+
-+
-+/*
-  *    ext3_find_entry()
-  *
-  * finds an entry in the specified directory with the wanted name. It
-@@ -105,6 +548,8 @@
-  * The returned buffer_head has ->b_count elevated.  The caller is expected
-  * to brelse() it when appropriate.
-  */
-+
-+      
- static struct buffer_head * ext3_find_entry (struct dentry *dentry,
-                                       struct ext3_dir_entry_2 ** res_dir)
- {
-@@ -119,10 +564,70 @@
-       int num = 0;
-       int nblocks, i, err;
-       struct inode *dir = dentry->d_parent->d_inode;
-+      ext3_dirent *de, *top;
-       *res_dir = NULL;
-       sb = dir->i_sb;
-+      if (dentry->d_name.len > EXT3_NAME_LEN)
-+              return NULL;
-+      if (ext3_dx && is_dx(dir)) {
-+              u32 hash = dx_hash(dentry->d_name.name, dentry->d_name.len);
-+              struct dx_frame frames[2], *frame;
-+              if (!(frame = dx_probe (dir, hash, frames)))
-+                      return NULL;
-+dxnext:
-+              block = dx_get_block(frame->at);
-+              if (!(bh = ext3_bread (NULL,dir, block, 0, &err)))
-+                      goto dxfail;
-+              de = (ext3_dirent *) bh->b_data;
-+              top = (ext3_dirent *) ((char *) de + sb->s_blocksize -
-+                              EXT3_DIR_REC_LEN(0));
-+              for (; de < top; de = ext3_next_entry(de))
-+                      if (ext3_match(dentry->d_name.len, dentry->d_name.name, de)) {
-+                              if (!ext3_check_dir_entry("ext3_find_entry",
-+                                        dir, de, bh,
-+                                        (block<<EXT3_BLOCK_SIZE_BITS(sb))
-+                                         +((char *)de - bh->b_data))) {
-+                                      brelse (bh);
-+                                      goto dxfail;
-+                              }
-+                              *res_dir = de;
-+                              goto dxfound;
-+                      }
-+              brelse (bh);
-+              /* Same hash continues in next block?  Search on. */
-+              if (++(frame->at) == frame->entries + dx_get_count(frame->entries))
-+              {
-+                      struct buffer_head *bh2;
-+                      if (frame == frames)
-+                              goto dxfail;
-+                      if (++(frames->at) == frames->entries + dx_get_count(frames->entries))
-+                              goto dxfail;
-+                      /* should omit read if not continued */
-+                      if (!(bh2 = ext3_bread (NULL, dir,
-+                                              dx_get_block(frames->at),
-+                                              0, &err)))
-+                              goto dxfail;
-+                      brelse (frame->bh);
-+                      frame->bh = bh2;
-+                      frame->at = frame->entries = ((struct dx_node *) bh2->b_data)->entries;
-+                      /* Subtle: the 0th entry has the count, find the hash in frame above */
-+                      if ((dx_get_hash(frames->at) & -2) == hash)
-+                              goto dxnext;
-+                      goto dxfail;
-+              }
-+              if ((dx_get_hash(frame->at) & -2) == hash)
-+                      goto dxnext;
-+dxfail:
-+              dxtrace(printk("%s not found\n", name));
-+              dx_release (frames);
-+              return NULL;
-+dxfound:
-+              dx_release (frames);
-+              return bh;
-+      }
-+      
-       nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
-       start = dir->u.ext3_i.i_dir_start_lookup;
-       if (start >= nblocks)
-@@ -237,6 +748,90 @@
-               de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
- }
-+static ext3_dirent *
-+dx_copy_dirents (char *from, char *to, struct dx_map_entry *map, int count)
-+{
-+      unsigned rec_len = 0;
-+
-+      while (count--) {
-+              ext3_dirent *de = (ext3_dirent *) (from + map->offs);
-+              rec_len = EXT3_DIR_REC_LEN(de->name_len);
-+              memcpy (to, de, rec_len);
-+              ((ext3_dirent *) to)->rec_len = rec_len;
-+              to += rec_len;
-+              map++;
-+      }
-+      return (ext3_dirent *) (to - rec_len);
-+}
-+
-+#ifdef CONFIG_EXT3_INDEX
-+static ext3_dirent *do_split(handle_t *handle, struct inode *dir,
-+                      struct buffer_head **bh,struct dx_frame *frame,
-+                      u32 hash, int *error)
-+{
-+      unsigned count;
-+      struct buffer_head *bh2;
-+      u32 newblock;
-+      u32 hash2;
-+      struct dx_map_entry *map;
-+      char *data1 = (*bh)->b_data, *data2, *data3;
-+      unsigned split;
-+      ext3_dirent *de, *de2;
-+
-+      bh2 = ext3_append (handle, dir, &newblock, error);
-+      if (!(bh2))
-+      {
-+              brelse(*bh);
-+              *bh = NULL;
-+              return (ext3_dirent *)bh2;
-+      }
-+
-+      BUFFER_TRACE(*bh, "get_write_access");
-+      ext3_journal_get_write_access(handle, *bh);
-+      BUFFER_TRACE(frame->bh, "get_write_access");
-+      ext3_journal_get_write_access(handle, frame->bh);
-+
-+      data2 = bh2->b_data;
-+
-+      map = kmalloc(sizeof(*map) * PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1,
-+                    GFP_KERNEL);
-+      if (!map)
-+              panic("no memory for do_split\n");
-+      count = dx_make_map((ext3_dirent *)data1, dir->i_sb->s_blocksize, map);
-+      split = count/2; // need to adjust to actual middle
-+      dx_sort_map (map, count);
-+      hash2 = map[split].hash;
-+      dxtrace(printk("Split block %i at %x, %i/%i\n",
-+              dx_get_block(frame->at), hash2, split, count-split));
-+
-+      /* Fancy dance to stay within two buffers */
-+      de2 = dx_copy_dirents (data1, data2, map + split, count - split);
-+      data3 = (char *) de2 + de2->rec_len;
-+      de = dx_copy_dirents (data1, data3, map, split);
-+      memcpy(data1, data3, (char *) de + de->rec_len - data3);
-+      de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de
-+      de->rec_len = cpu_to_le16(data1 + dir->i_sb->s_blocksize - (char *)de);
-+      de2->rec_len = cpu_to_le16(data2 + dir->i_sb->s_blocksize-(char *)de2);
-+      dxtrace(dx_show_leaf((ext3_dirent *)data1, dir->i_sb->s_blocksize, 1));
-+      dxtrace(dx_show_leaf((ext3_dirent *)data2, dir->i_sb->s_blocksize, 1));
-+
-+      /* Which block gets the new entry? */
-+      if (hash >= hash2)
-+      {
-+              swap(*bh, bh2);
-+              de = de2;
-+      }
-+      dx_insert_block(frame, hash2 + (hash2 == map[split-1].hash), newblock);
-+      ext3_journal_dirty_metadata (handle, bh2);
-+      brelse (bh2);
-+      ext3_journal_dirty_metadata (handle, frame->bh);
-+      dxtrace(dx_show_index ("frame", frame->entries));
-+      kfree(map);
-+      return de;
-+}
-+#endif
-+
-+
- /*
-  *    ext3_add_entry()
-  *
-@@ -255,118 +849,278 @@
-       struct inode *inode)
- {
-       struct inode *dir = dentry->d_parent->d_inode;
--      const char *name = dentry->d_name.name;
--      int namelen = dentry->d_name.len;
-       unsigned long offset;
--      unsigned short rec_len;
-       struct buffer_head * bh;
--      struct ext3_dir_entry_2 * de, * de1;
--      struct super_block * sb;
-+      ext3_dirent *de;
-+      struct super_block * sb = dir->i_sb;
-       int     retval;
-+      unsigned short reclen = EXT3_DIR_REC_LEN(dentry->d_name.len);
--      sb = dir->i_sb;
-+      unsigned nlen, rlen;
-+      u32 block, blocks;
-+      char *top;
--      if (!namelen)
-+      if (!dentry->d_name.len)
-               return -EINVAL;
--      bh = ext3_bread (handle, dir, 0, 0, &retval);
--      if (!bh)
--              return retval;
--      rec_len = EXT3_DIR_REC_LEN(namelen);
--      offset = 0;
--      de = (struct ext3_dir_entry_2 *) bh->b_data;
--      while (1) {
--              if ((char *)de >= sb->s_blocksize + bh->b_data) {
--                      brelse (bh);
--                      bh = NULL;
--                      bh = ext3_bread (handle, dir,
--                              offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
--                      if (!bh)
--                              return retval;
--                      if (dir->i_size <= offset) {
--                              if (dir->i_size == 0) {
--                                      brelse(bh);
--                                      return -ENOENT;
-+      if (ext3_dx && is_dx(dir)) {
-+              struct dx_frame frames[2], *frame;
-+              struct dx_entry *entries, *at;
-+              u32 hash;
-+              char *data1;
-+
-+              hash = dx_hash(dentry->d_name.name, dentry->d_name.len);
-+              /* FIXME: do something if dx_probe() fails here */
-+              frame = dx_probe(dir, hash, frames);
-+              entries = frame->entries;
-+              at = frame->at;
-+
-+              if (!(bh = ext3_bread(handle,dir, dx_get_block(at), 0,&retval)))
-+                      goto dxfail1;
-+
-+              BUFFER_TRACE(bh, "get_write_access");
-+              ext3_journal_get_write_access(handle, bh);
-+
-+              data1 = bh->b_data;
-+              de = (ext3_dirent *) data1;
-+              top = data1 + (0? 200: sb->s_blocksize);
-+              while ((char *) de < top)
-+              {
-+                      /* FIXME: check EEXIST and dir */
-+                      nlen = EXT3_DIR_REC_LEN(de->name_len);
-+                      rlen = le16_to_cpu(de->rec_len);
-+                      if ((de->inode? rlen - nlen: rlen) >= reclen)
-+                              goto dx_add;
-+                      de = (ext3_dirent *) ((char *) de + rlen);
-+              }
-+              /* Block full, should compress but for now just split */
-+              dxtrace(printk("using %u of %u node entries\n",
-+                      dx_get_count(entries), dx_get_limit(entries)));
-+              /* Need to split index? */
-+              if (dx_get_count(entries) == dx_get_limit(entries))
-+              {
-+                      u32 newblock;
-+                      unsigned icount = dx_get_count(entries);
-+                      int levels = frame - frames;
-+                      struct dx_entry *entries2;
-+                      struct dx_node *node2;
-+                      struct buffer_head *bh2;
-+                      if (levels && dx_get_count(frames->entries) == dx_get_limit(frames->entries))
-+                              goto dxfull;
-+                      bh2 = ext3_append (handle, dir, &newblock, &retval);
-+                      if (!(bh2))
-+                              goto dxfail2;
-+                      node2 = (struct dx_node *)(bh2->b_data);
-+                      entries2 = node2->entries;
-+                      node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
-+                      node2->fake.inode = 0;
-+                      BUFFER_TRACE(frame->bh, "get_write_access");
-+                      ext3_journal_get_write_access(handle, frame->bh);
-+                      if (levels)
-+                      {
-+                              unsigned icount1 = icount/2, icount2 = icount - icount1;
-+                              unsigned hash2 = dx_get_hash(entries + icount1);
-+                              dxtrace(printk("Split index %i/%i\n", icount1, icount2));
-+                              
-+                              BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
-+                              ext3_journal_get_write_access(handle, frames[0].bh);
-+                              
-+                              memcpy ((char *) entries2, (char *) (entries + icount1),
-+                                      icount2 * sizeof(struct dx_entry));
-+                              dx_set_count (entries, icount1);
-+                              dx_set_count (entries2, icount2);
-+                              dx_set_limit (entries2, dx_node_limit(dir));
-+
-+                              /* Which index block gets the new entry? */
-+                              if (at - entries >= icount1) {
-+                                      frame->at = at = at - entries - icount1 + entries2;
-+                                      frame->entries = entries = entries2;
-+                                      swap(frame->bh, bh2);
-                               }
--
--                              ext3_debug ("creating next block\n");
--
--                              BUFFER_TRACE(bh, "get_write_access");
--                              ext3_journal_get_write_access(handle, bh);
--                              de = (struct ext3_dir_entry_2 *) bh->b_data;
--                              de->inode = 0;
--                              de->rec_len = le16_to_cpu(sb->s_blocksize);
--                              dir->u.ext3_i.i_disksize =
--                                      dir->i_size = offset + sb->s_blocksize;
--                              dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
--                              ext3_mark_inode_dirty(handle, dir);
-+                              dx_insert_block (frames + 0, hash2, newblock);
-+                              dxtrace(dx_show_index ("node", frames[1].entries));
-+                              dxtrace(dx_show_index ("node",
-+                                      ((struct dx_node *) bh2->b_data)->entries));
-+                              ext3_journal_dirty_metadata(handle, bh2);
-+                              brelse (bh2);
-                       } else {
--
--                              ext3_debug ("skipping to next block\n");
--
--                              de = (struct ext3_dir_entry_2 *) bh->b_data;
-+                              dxtrace(printk("Creating second level index...\n"));
-+                              memcpy((char *) entries2, (char *) entries,
-+                                      icount * sizeof(struct dx_entry));
-+                              dx_set_limit(entries2, dx_node_limit(dir));
-+
-+                              /* Set up root */
-+                              dx_set_count(entries, 1);
-+                              dx_set_block(entries + 0, newblock);
-+                              ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
-+
-+                              /* Add new access path frame */
-+                              frame = frames + 1;
-+                              frame->at = at = at - entries + entries2;
-+                              frame->entries = entries = entries2;
-+                              frame->bh = bh2;
-+                              ext3_journal_get_write_access(handle, frame->bh);
-                       }
-+                      ext3_journal_dirty_metadata(handle, frames[0].bh);
-               }
--              if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
--                                         offset)) {
--                      brelse (bh);
--                      return -ENOENT;
--              }
--              if (ext3_match (namelen, name, de)) {
-+              de = do_split(handle, dir, &bh, frame, hash, &retval);
-+              dx_release (frames);
-+              if (!(de))
-+                      goto fail;
-+              nlen = EXT3_DIR_REC_LEN(de->name_len);
-+              rlen = le16_to_cpu(de->rec_len);
-+              goto add;
-+
-+dx_add:
-+              dx_release (frames);
-+              goto add;
-+
-+dxfull:
-+              ext3_warning(sb, __FUNCTION__, "Directory index full!\n");
-+              retval = -ENOSPC;
-+dxfail2:
-+              brelse(bh);
-+dxfail1:
-+              dx_release (frames);
-+              goto fail1;
-+      }
-+
-+      blocks = dir->i_size >> sb->s_blocksize_bits;
-+      for (block = 0, offset = 0; block < blocks; block++) {
-+              bh = ext3_bread(handle, dir, block, 0, &retval);
-+              if(!bh)
-+                      return retval;
-+              de = (ext3_dirent *)bh->b_data;
-+              top = bh->b_data + sb->s_blocksize - reclen;
-+              while ((char *) de <= top) {
-+                      if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
-+                                                bh, offset)) {
-+                              brelse (bh);
-+                              return -EIO;
-+                      }
-+                      if (ext3_match(dentry->d_name.len,dentry->d_name.name,de)) {
-                               brelse (bh);
-                               return -EEXIST;
--              }
--              if ((le32_to_cpu(de->inode) == 0 &&
--                              le16_to_cpu(de->rec_len) >= rec_len) ||
--                  (le16_to_cpu(de->rec_len) >=
--                              EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
--                      BUFFER_TRACE(bh, "get_write_access");
--                      ext3_journal_get_write_access(handle, bh);
--                      /* By now the buffer is marked for journaling */
--                      offset += le16_to_cpu(de->rec_len);
--                      if (le32_to_cpu(de->inode)) {
--                              de1 = (struct ext3_dir_entry_2 *) ((char *) de +
--                                      EXT3_DIR_REC_LEN(de->name_len));
--                              de1->rec_len =
--                                      cpu_to_le16(le16_to_cpu(de->rec_len) -
--                                      EXT3_DIR_REC_LEN(de->name_len));
--                              de->rec_len = cpu_to_le16(
--                                              EXT3_DIR_REC_LEN(de->name_len));
--                              de = de1;
-                       }
--                      de->file_type = EXT3_FT_UNKNOWN;
--                      if (inode) {
--                              de->inode = cpu_to_le32(inode->i_ino);
--                              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
--                      } else
--                              de->inode = 0;
--                      de->name_len = namelen;
--                      memcpy (de->name, name, namelen);
--                      /*
--                       * XXX shouldn't update any times until successful
--                       * completion of syscall, but too many callers depend
--                       * on this.
--                       *
--                       * XXX similarly, too many callers depend on
--                       * ext3_new_inode() setting the times, but error
--                       * recovery deletes the inode, so the worst that can
--                       * happen is that the times are slightly out of date
--                       * and/or different from the directory change time.
--                       */
--                      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
--                      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
--                      dir->i_version = ++event;
--                      ext3_mark_inode_dirty(handle, dir);
--                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
--                      ext3_journal_dirty_metadata(handle, bh);
-+                      nlen = EXT3_DIR_REC_LEN(de->name_len);
-+                      rlen = le16_to_cpu(de->rec_len);
-+                      if ((de->inode ? rlen - nlen: rlen) >= reclen)
-+                              goto add;
-+                      de = (ext3_dirent *)((char *)de + rlen);
-+                      offset += rlen;
-+              }
-+              if (ext3_dx && blocks == 1 && test_opt(sb, INDEX))
-+                      goto dx_make_index;
-+              brelse(bh);
-+      }
-+      bh = ext3_append(handle, dir, &block, &retval);
-+      if (!bh)
-+              return retval;
-+      de = (ext3_dirent *) bh->b_data;
-+      de->inode = 0;
-+      de->rec_len = cpu_to_le16(rlen = sb->s_blocksize);
-+      nlen = 0;
-+      goto add;
-+
-+add:
-+      BUFFER_TRACE(bh, "get_write_access");
-+      ext3_journal_get_write_access(handle, bh);
-+      /* By now the buffer is marked for journaling */
-+      if (de->inode) {
-+              ext3_dirent *de1 = (ext3_dirent *)((char *)de + nlen);
-+              de1->rec_len = cpu_to_le16(rlen - nlen);
-+              de->rec_len = cpu_to_le16(nlen);
-+              de = de1;
-+      }
-+      de->file_type = EXT3_FT_UNKNOWN;
-+      if (inode) {
-+              de->inode = cpu_to_le32(inode->i_ino);
-+              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
-+      } else
-+              de->inode = 0;
-+      de->name_len = dentry->d_name.len;
-+      memcpy (de->name, dentry->d_name.name, dentry->d_name.len);
-+      /*
-+       * XXX shouldn't update any times until successful
-+       * completion of syscall, but too many callers depend
-+       * on this.
-+       *
-+       * XXX similarly, too many callers depend on
-+       * ext3_new_inode() setting the times, but error
-+       * recovery deletes the inode, so the worst that can
-+       * happen is that the times are slightly out of date
-+       * and/or different from the directory change time.
-+       */
-+      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-+      ext3_update_dx_flag(dir);
-+      dir->i_version = ++event;
-+      ext3_mark_inode_dirty(handle, dir);
-+      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+      ext3_journal_dirty_metadata(handle, bh);
-+      brelse(bh);
-+      return 0;
-+
-+dx_make_index:
-+      {
-+              struct buffer_head *bh2;
-+              struct dx_root *root;
-+              struct dx_frame frames[2], *frame;
-+              struct dx_entry *entries;
-+              ext3_dirent *de2;
-+              char *data1;
-+              unsigned len;
-+              u32 hash;
-+              
-+              dxtrace(printk("Creating index\n"));
-+              ext3_journal_get_write_access(handle, bh);
-+              root = (struct dx_root *) bh->b_data;
-+              
-+              EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
-+              bh2 = ext3_append (handle, dir, &block, &retval);
-+              if (!(bh2))
-+              {
-                       brelse(bh);
--                      return 0;
-+                      return retval;
-               }
--              offset += le16_to_cpu(de->rec_len);
--              de = (struct ext3_dir_entry_2 *)
--                      ((char *) de + le16_to_cpu(de->rec_len));
-+              data1 = bh2->b_data;
-+
-+              /* The 0th block becomes the root, move the dirents out */
-+              de = (ext3_dirent *) &root->info;
-+              len = ((char *) root) + sb->s_blocksize - (char *) de;
-+              memcpy (data1, de, len);
-+              de = (ext3_dirent *) data1;
-+              top = data1 + len;
-+              while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
-+                      de = de2;
-+              de->rec_len = cpu_to_le16(data1 + sb->s_blocksize - (char *)de);
-+              /* Initialize the root; the dot dirents already exist */
-+              de = (ext3_dirent *) (&root->dotdot);
-+              de->rec_len = cpu_to_le16(sb->s_blocksize-EXT3_DIR_REC_LEN(2));
-+              memset (&root->info, 0, sizeof(root->info));
-+              root->info.info_length = sizeof(root->info);
-+              entries = root->entries;
-+              dx_set_block (entries, 1);
-+              dx_set_count (entries, 1);
-+              dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
-+
-+              /* Initialize as for dx_probe */
-+              hash = dx_hash (dentry->d_name.name, dentry->d_name.len);
-+              frame = frames;
-+              frame->entries = entries;
-+              frame->at = entries;
-+              frame->bh = bh;
-+              bh = bh2;
-+              de = do_split(handle,dir, &bh, frame, hash, &retval);
-+              dx_release (frames);
-+              if (!(de))
-+                      return retval;
-+              nlen = EXT3_DIR_REC_LEN(de->name_len);
-+              rlen = le16_to_cpu(de->rec_len);
-+              goto add;
-       }
--      brelse (bh);
--      return -ENOSPC;
-+fail1:
-+      return retval;
-+fail:
-+      return -ENOENT;
- }
- /*
-@@ -451,7 +1212,8 @@
-       struct inode * inode;
-       int err;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-@@ -478,7 +1240,8 @@
-       struct inode *inode;
-       int err;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-@@ -507,7 +1270,8 @@
-       if (dir->i_nlink >= EXT3_LINK_MAX)
-               return -EMLINK;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-@@ -550,7 +1320,7 @@
-       if (err)
-               goto out_no_entry;
-       dir->i_nlink++;
--      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
-       d_instantiate(dentry, inode);
- out_stop:
-@@ -832,7 +1596,7 @@
-       ext3_mark_inode_dirty(handle, inode);
-       dir->i_nlink--;
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
--      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
- end_rmdir:
-@@ -878,7 +1642,7 @@
-       if (retval)
-               goto end_unlink;
-       dir->i_ctime = dir->i_mtime = CURRENT_TIME;
--      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
-       inode->i_nlink--;
-       if (!inode->i_nlink)
-@@ -904,7 +1668,8 @@
-       if (l > dir->i_sb->s_blocksize)
-               return -ENAMETOOLONG;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5);
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-@@ -959,7 +1724,8 @@
-       if (inode->i_nlink >= EXT3_LINK_MAX)
-               return -EMLINK;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS);
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-@@ -995,7 +1761,8 @@
-       old_bh = new_bh = dir_bh = NULL;
--      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
-+      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-@@ -1077,7 +1844,7 @@
-               new_inode->i_ctime = CURRENT_TIME;
-       }
-       old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
--      old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(old_dir);
-       if (dir_bh) {
-               BUFFER_TRACE(dir_bh, "get_write_access");
-               ext3_journal_get_write_access(handle, dir_bh);
-@@ -1089,7 +1856,7 @@
-                       new_inode->i_nlink--;
-               } else {
-                       new_dir->i_nlink++;
--                      new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+                      ext3_update_dx_flag(new_dir);
-                       ext3_mark_inode_dirty(handle, new_dir);
-               }
-       }
---- ./include/linux/ext3_fs.h  2002/03/05 06:18:59     2.1
-+++ ./include/linux/ext3_fs.h  2002/03/05 06:26:56
-@@ -339,6 +339,7 @@
-   #define EXT3_MOUNT_WRITEBACK_DATA   0x0C00  /* No data ordering */
- #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
- #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
-+#define EXT3_MOUNT_INDEX              0x4000  /* Enable directory index */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef _LINUX_EXT2_FS_H
-@@ -575,6 +576,24 @@
- #define EXT3_DIR_ROUND                        (EXT3_DIR_PAD - 1)
- #define EXT3_DIR_REC_LEN(name_len)    (((name_len) + 8 + EXT3_DIR_ROUND) & \
-                                        ~EXT3_DIR_ROUND)
-+/*
-+ * Hash Tree Directory indexing
-+ * (c) Daniel Phillips, 2001
-+ */
-+
-+#define CONFIG_EXT3_INDEX
-+
-+#ifdef CONFIG_EXT3_INDEX
-+  enum {ext3_dx = 1};
-+  #define is_dx(dir) (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)
-+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#else
-+  enum {ext3_dx = 0};
-+  #define is_dx(dir) 0
-+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
-+#endif
- #ifdef __KERNEL__
- /*
---- ./include/linux/ext3_jbd.h 2002/03/05 06:18:59     2.1
-+++ ./include/linux/ext3_jbd.h 2002/03/05 06:33:54
-@@ -63,6 +63,8 @@
- #define EXT3_RESERVE_TRANS_BLOCKS     12
-+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
-+
- int
- ext3_mark_iloc_dirty(handle_t *handle, 
-                    struct inode *inode,