1 --- ./fs/ext3/super.c 2002/03/05 06:18:59 2.1
2 +++ ./fs/ext3/super.c 2002/03/05 06:26:56
4 "EXT3 Check option not supported\n");
7 + else if (!strcmp (this_char, "index"))
8 +#ifdef CONFIG_EXT3_INDEX
9 + set_opt (*mount_options, INDEX);
11 + printk("EXT3 index option not supported\n");
13 else if (!strcmp (this_char, "debug"))
14 set_opt (*mount_options, DEBUG);
15 else if (!strcmp (this_char, "errors")) {
17 es->s_mtime = cpu_to_le32(CURRENT_TIME);
18 ext3_update_dynamic_rev(sb);
19 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
21 + if (test_opt(sb, INDEX))
22 + EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX);
23 + else if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
24 + set_opt (EXT3_SB(sb)->s_mount_opt, INDEX);
26 ext3_commit_super (sb, es, 1);
27 if (test_opt (sb, DEBUG))
29 --- ./fs/ext3/namei.c 2002/03/05 06:18:59 2.1
30 +++ ./fs/ext3/namei.c 2002/03/06 00:13:18
32 * David S. Miller (davem@caip.rutgers.edu), 1995
33 * Directory entry file type support and forward compatibility hooks
34 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
35 + * Hash Tree Directory indexing (c)
36 + * Daniel Phillips, 2001
37 + * Hash Tree Directory indexing porting
38 + * Christopher Li, 2002
43 #include <linux/string.h>
44 #include <linux/locks.h>
45 #include <linux/quotaops.h>
47 +#include <linux/slab.h>
50 * define how far ahead to read directories while searching them.
52 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
53 #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
55 +static struct buffer_head *ext3_append(handle_t *handle,
56 + struct inode *inode,
57 + u32 *block, int *err)
59 + struct buffer_head *bh;
61 + *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
63 + if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
64 + inode->i_size += inode->i_sb->s_blocksize;
65 + EXT3_I(inode)->i_disksize = inode->i_size;
66 + ext3_journal_get_write_access(handle,bh);
72 +#define assert(test) J_ASSERT(test)
76 +#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
79 +typedef struct { u32 v; } le_u32;
80 +typedef struct { u16 v; } le_u16;
82 +#define dxtrace_on(command) command
83 +#define dxtrace_off(command)
106 + * dx_root_info is laid out so that if it should somehow get overlaid by a
107 + * dirent the two low bits of the hash version will be zero. Therefore, the
108 + * hash version mod 4 should never be 0. Sincerely, the paranoia department.
113 + struct fake_dirent dot;
115 + struct fake_dirent dotdot;
116 + char dotdot_name[4];
117 + struct dx_root_info
119 + le_u32 reserved_zero;
120 + u8 hash_version; /* 0 now, 1 at release */
121 + u8 info_length; /* 8 */
122 + u8 indirect_levels;
126 + struct dx_entry entries[0];
131 + struct fake_dirent fake;
132 + struct dx_entry entries[0];
138 + struct buffer_head *bh;
139 + struct dx_entry *entries;
140 + struct dx_entry *at;
149 +typedef struct ext3_dir_entry_2 ext3_dirent;
150 +static inline unsigned dx_get_block (struct dx_entry *entry);
151 +static void dx_set_block (struct dx_entry *entry, unsigned value);
152 +static inline unsigned dx_get_hash (struct dx_entry *entry);
153 +static void dx_set_hash (struct dx_entry *entry, unsigned value);
154 +static unsigned dx_get_count (struct dx_entry *entries);
155 +static unsigned dx_get_limit (struct dx_entry *entries);
156 +static void dx_set_count (struct dx_entry *entries, unsigned value);
157 +static void dx_set_limit (struct dx_entry *entries, unsigned value);
158 +static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
159 +static unsigned dx_node_limit (struct inode *dir);
160 +static unsigned dx_hack_hash (const u8 *name, int len);
161 +static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame);
162 +static void dx_release (struct dx_frame *frames);
163 +static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[]);
164 +static void dx_sort_map(struct dx_map_entry *map, unsigned count);
165 +static ext3_dirent *dx_copy_dirents (char *from, char *to,
166 + struct dx_map_entry *map, int count);
167 +static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
170 +#ifdef CONFIG_EXT3_INDEX
172 + * Future: use high four bits of block for coalesce-on-delete flags
173 + * Mask them off for now.
176 +static inline unsigned dx_get_block (struct dx_entry *entry)
178 + return le32_to_cpu(entry->block.v) & 0x00ffffff;
181 +static inline void dx_set_block (struct dx_entry *entry, unsigned value)
183 + entry->block.v = cpu_to_le32(value);
186 +static inline unsigned dx_get_hash (struct dx_entry *entry)
188 + return le32_to_cpu(entry->hash.v);
191 +static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
193 + entry->hash.v = cpu_to_le32(value);
196 +static inline unsigned dx_get_count (struct dx_entry *entries)
198 + return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
201 +static inline unsigned dx_get_limit (struct dx_entry *entries)
203 + return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
206 +static inline void dx_set_count (struct dx_entry *entries, unsigned value)
208 + ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
211 +static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
213 + ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
216 +static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
218 + unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
219 + EXT3_DIR_REC_LEN(2) - infosize;
220 + return 0? 20: entry_space / sizeof(struct dx_entry);
223 +static inline unsigned dx_node_limit (struct inode *dir)
225 + unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
226 + return 0? 22: entry_space / sizeof(struct dx_entry);
229 +/* Hash function - not bad, but still looking for an ideal default */
231 +static unsigned dx_hack_hash (const u8 *name, int len)
233 + u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
236 + u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
237 + if (hash & 0x80000000) hash -= 0x7fffffff;
244 +#define dx_hash(s,n) (dx_hack_hash(s,n) << 1)
250 +#define dxtrace dxtrace_on
251 +static void dx_show_index (char * label, struct dx_entry *entries)
253 + int i, n = dx_get_count (entries);
254 + printk("%s index ", label);
255 + for (i = 0; i < n; i++)
257 + printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
269 +static struct stats dx_show_leaf (ext3_dirent *de, int size, int show_names)
271 + unsigned names = 0, space = 0;
272 + char *base = (char *) de;
274 + while ((char *) de < base + size)
280 + int len = de->name_len;
281 + char *name = de->name;
282 + while (len--) printk("%c", *name++);
283 + printk(":%x.%u ", dx_hash (de->name, de->name_len), ((char *) de - base));
285 + space += EXT3_DIR_REC_LEN(de->name_len);
288 + de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
290 + printk("(%i)\n", names);
291 + return (struct stats) { names, space, 1 };
294 +struct stats dx_show_entries (struct inode *dir, struct dx_entry *entries, int levels)
296 + unsigned blocksize = dir->i_sb->s_blocksize;
297 + unsigned count = dx_get_count (entries), names = 0, space = 0, i;
298 + unsigned bcount = 0;
299 + struct buffer_head *bh;
301 + printk("%i indexed blocks...\n", count);
302 + for (i = 0; i < count; i++, entries++)
304 + u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
305 + u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
306 + struct stats stats;
307 + printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
308 + if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
310 + dx_show_entries (dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
311 + dx_show_leaf ((ext3_dirent *) bh->b_data, blocksize, 0);
312 + names += stats.names;
313 + space += stats.space;
314 + bcount += stats.bcount;
318 + printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ",
319 + names, space/bcount,(space/bcount)*100/blocksize);
320 + return (struct stats) { names, space, bcount};
323 +#define dxtrace dxtrace_off
327 + * Probe for a directory leaf block to search
330 +static struct dx_frame *
331 +dx_probe(struct inode *dir, u32 hash, struct dx_frame *frame_in)
333 + unsigned count, indirect;
334 + struct dx_entry *at, *entries, *p, *q, *m;
335 + struct dx_root *root;
336 + struct buffer_head *bh;
337 + struct dx_frame *frame = frame_in;
341 + if (!(bh = ext3_bread(NULL, dir, 0, 0, &err)))
343 + root = (struct dx_root *) bh->b_data;
344 + if (root->info.hash_version > 0 || root->info.unused_flags & 1) {
348 + if ((indirect = root->info.indirect_levels) > 1) {
352 + entries = (struct dx_entry *) (((char *) &root->info) + root->info.info_length);
353 + assert (dx_get_limit(entries) == dx_root_limit(dir, root->info.info_length));
354 + dxtrace (printk("Look up %x", hash));
357 + count = dx_get_count(entries);
358 + assert (count && count <= dx_get_limit(entries));
360 + q = entries + count - 1;
364 + dxtrace(printk("."));
365 + if (dx_get_hash(m) > hash)
371 + if (0) // linear search cross check
373 + unsigned n = count - 1;
377 + dxtrace(printk(","));
378 + if (dx_get_hash(++at) > hash)
384 + assert (at == p - 1);
388 + dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
390 + frame->entries = entries;
392 + if (!indirect--) return frame;
393 + if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0,&err)))
395 + at = entries = ((struct dx_node *) bh->b_data)->entries;
396 + assert (dx_get_limit(entries) == dx_node_limit (dir));
400 + while (frame >= frame_in) {
408 +static void dx_release (struct dx_frame *frames)
410 + if (frames[0].bh == NULL)
413 + if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
414 + brelse (frames[1].bh);
415 + brelse (frames[0].bh);
419 + * Directory block splitting, compacting
422 +static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[])
425 + char *base = (char *) de;
426 + while ((char *) de < base + size) {
427 + if (de->name_len && de->inode) {
428 + map[count].hash = dx_hash (de->name, de->name_len);
429 + map[count].offs = (u32) ((char *) de - base);
432 + de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
437 +static void dx_sort_map (struct dx_map_entry *map, unsigned count)
439 + struct dx_map_entry *p, *q, *top = map + count - 1;
441 + /* Combsort until bubble sort doesn't suck */
444 + count = count*10/13;
445 + if (count - 9 < 2) /* 9, 10 -> 11 */
447 + for (p = top, q = p - count; q >= map; p--, q--)
448 + if (p->hash < q->hash)
451 + /* Garden variety bubble sort */
457 + if (q[1].hash >= q[0].hash)
465 +static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
467 + struct dx_entry *entries = frame->entries;
468 + struct dx_entry *old = frame->at, *new = old + 1;
469 + int count = dx_get_count(entries);
471 + assert(count < dx_get_limit(entries));
472 + assert(old < entries + count);
473 + memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
474 + dx_set_hash(new, hash);
475 + dx_set_block(new, block);
476 + dx_set_count(entries, count + 1);
480 +static void ext3_update_dx_flag(struct inode *inode)
482 + if (!test_opt(inode->i_sb, INDEX))
483 + EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
487 * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
493 + * p is at least 6 bytes before the end of page
495 +static inline ext3_dirent *ext3_next_entry(ext3_dirent *p)
497 + return (ext3_dirent *)((char*)p + le16_to_cpu(p->rec_len));
504 * finds an entry in the specified directory with the wanted name. It
506 * The returned buffer_head has ->b_count elevated. The caller is expected
507 * to brelse() it when appropriate.
511 static struct buffer_head * ext3_find_entry (struct dentry *dentry,
512 struct ext3_dir_entry_2 ** res_dir)
514 @@ -119,10 +564,70 @@
517 struct inode *dir = dentry->d_parent->d_inode;
518 + ext3_dirent *de, *top;
522 + if (dentry->d_name.len > EXT3_NAME_LEN)
524 + if (ext3_dx && is_dx(dir)) {
525 + u32 hash = dx_hash(dentry->d_name.name, dentry->d_name.len);
526 + struct dx_frame frames[2], *frame;
527 + if (!(frame = dx_probe (dir, hash, frames)))
530 + block = dx_get_block(frame->at);
531 + if (!(bh = ext3_bread (NULL,dir, block, 0, &err)))
533 + de = (ext3_dirent *) bh->b_data;
534 + top = (ext3_dirent *) ((char *) de + sb->s_blocksize -
535 + EXT3_DIR_REC_LEN(0));
536 + for (; de < top; de = ext3_next_entry(de))
537 + if (ext3_match(dentry->d_name.len, dentry->d_name.name, de)) {
538 + if (!ext3_check_dir_entry("ext3_find_entry",
540 + (block<<EXT3_BLOCK_SIZE_BITS(sb))
541 + +((char *)de - bh->b_data))) {
549 + /* Same hash continues in next block? Search on. */
550 + if (++(frame->at) == frame->entries + dx_get_count(frame->entries))
552 + struct buffer_head *bh2;
553 + if (frame == frames)
555 + if (++(frames->at) == frames->entries + dx_get_count(frames->entries))
557 + /* should omit read if not continued */
558 + if (!(bh2 = ext3_bread (NULL, dir,
559 + dx_get_block(frames->at),
562 + brelse (frame->bh);
564 + frame->at = frame->entries = ((struct dx_node *) bh2->b_data)->entries;
565 + /* Subtle: the 0th entry has the count, find the hash in frame above */
566 + if ((dx_get_hash(frames->at) & -2) == hash)
570 + if ((dx_get_hash(frame->at) & -2) == hash)
573 + dxtrace(printk("%s not found\n", name));
574 + dx_release (frames);
577 + dx_release (frames);
582 nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
583 start = dir->u.ext3_i.i_dir_start_lookup;
584 if (start >= nblocks)
586 de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
589 +static ext3_dirent *
590 +dx_copy_dirents (char *from, char *to, struct dx_map_entry *map, int count)
592 + unsigned rec_len = 0;
595 + ext3_dirent *de = (ext3_dirent *) (from + map->offs);
596 + rec_len = EXT3_DIR_REC_LEN(de->name_len);
597 + memcpy (to, de, rec_len);
598 + ((ext3_dirent *)to)->rec_len = le16_to_cpu(rec_len);
602 + return (ext3_dirent *) (to - rec_len);
605 +#ifdef CONFIG_EXT3_INDEX
606 +static ext3_dirent *do_split(handle_t *handle, struct inode *dir,
607 + struct buffer_head **bh,struct dx_frame *frame,
608 + u32 hash, int *error)
611 + struct buffer_head *bh2;
614 + struct dx_map_entry *map;
615 + char *data1 = (*bh)->b_data, *data2, *data3;
617 + ext3_dirent *de, *de2;
619 + bh2 = ext3_append (handle, dir, &newblock, error);
624 + return (ext3_dirent *)bh2;
627 + BUFFER_TRACE(*bh, "get_write_access");
628 + ext3_journal_get_write_access(handle, *bh);
629 + BUFFER_TRACE(frame->bh, "get_write_access");
630 + ext3_journal_get_write_access(handle, frame->bh);
632 + data2 = bh2->b_data;
634 + map = kmalloc(sizeof(*map) * PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1,
637 + panic("no memory for do_split\n");
638 + count = dx_make_map((ext3_dirent *)data1, dir->i_sb->s_blocksize, map);
639 + split = count/2; // need to adjust to actual middle
640 + dx_sort_map (map, count);
641 + hash2 = map[split].hash;
642 + dxtrace(printk("Split block %i at %x, %i/%i\n",
643 + dx_get_block(frame->at), hash2, split, count-split));
645 + /* Fancy dance to stay within two buffers */
646 + de2 = dx_copy_dirents (data1, data2, map + split, count - split);
647 + data3 = (char *) de2 + le16_to_cpu(de2->rec_len);
648 + de = dx_copy_dirents (data1, data3, map, split);
649 + memcpy(data1, data3, (char *) de + le16_to_cpu(de->rec_len) - data3);
650 + de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de
651 + de->rec_len = cpu_to_le16(data1 + dir->i_sb->s_blocksize - (char *)de);
652 + de2->rec_len = cpu_to_le16(data2 + dir->i_sb->s_blocksize-(char *)de2);
653 + dxtrace(dx_show_leaf((ext3_dirent *)data1, dir->i_sb->s_blocksize, 1));
654 + dxtrace(dx_show_leaf((ext3_dirent *)data2, dir->i_sb->s_blocksize, 1));
656 + /* Which block gets the new entry? */
662 + dx_insert_block(frame, hash2 + (hash2 == map[split-1].hash), newblock);
663 + ext3_journal_dirty_metadata (handle, bh2);
665 + ext3_journal_dirty_metadata (handle, frame->bh);
666 + dxtrace(dx_show_index ("frame", frame->entries));
676 @@ -255,118 +849,279 @@
679 struct inode *dir = dentry->d_parent->d_inode;
680 - const char *name = dentry->d_name.name;
681 - int namelen = dentry->d_name.len;
682 unsigned long offset;
683 - unsigned short rec_len;
684 struct buffer_head * bh;
685 - struct ext3_dir_entry_2 * de, * de1;
686 - struct super_block * sb;
688 + struct super_block * sb = dir->i_sb;
690 + unsigned short reclen = EXT3_DIR_REC_LEN(dentry->d_name.len);
693 + unsigned nlen, rlen;
698 + if (!dentry->d_name.len)
700 - bh = ext3_bread (handle, dir, 0, 0, &retval);
703 - rec_len = EXT3_DIR_REC_LEN(namelen);
705 - de = (struct ext3_dir_entry_2 *) bh->b_data;
707 - if ((char *)de >= sb->s_blocksize + bh->b_data) {
710 - bh = ext3_bread (handle, dir,
711 - offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
714 - if (dir->i_size <= offset) {
715 - if (dir->i_size == 0) {
718 + if (ext3_dx && is_dx(dir)) {
719 + struct dx_frame frames[2], *frame;
720 + struct dx_entry *entries, *at;
724 + hash = dx_hash(dentry->d_name.name, dentry->d_name.len);
725 + /* FIXME: do something if dx_probe() fails here */
726 + frame = dx_probe(dir, hash, frames);
727 + entries = frame->entries;
730 + if (!(bh = ext3_bread(handle,dir, dx_get_block(at), 0,&retval)))
733 + BUFFER_TRACE(bh, "get_write_access");
734 + ext3_journal_get_write_access(handle, bh);
736 + data1 = bh->b_data;
737 + de = (ext3_dirent *) data1;
738 + top = data1 + (0? 200: sb->s_blocksize);
739 + while ((char *) de < top)
741 + /* FIXME: check EEXIST and dir */
742 + nlen = EXT3_DIR_REC_LEN(de->name_len);
743 + rlen = le16_to_cpu(de->rec_len);
744 + if ((de->inode? rlen - nlen: rlen) >= reclen)
746 + de = (ext3_dirent *) ((char *) de + rlen);
748 + /* Block full, should compress but for now just split */
749 + dxtrace(printk("using %u of %u node entries\n",
750 + dx_get_count(entries), dx_get_limit(entries)));
751 + /* Need to split index? */
752 + if (dx_get_count(entries) == dx_get_limit(entries))
755 + unsigned icount = dx_get_count(entries);
756 + int levels = frame - frames;
757 + struct dx_entry *entries2;
758 + struct dx_node *node2;
759 + struct buffer_head *bh2;
760 + if (levels && dx_get_count(frames->entries) == dx_get_limit(frames->entries))
762 + bh2 = ext3_append (handle, dir, &newblock, &retval);
765 + node2 = (struct dx_node *)(bh2->b_data);
766 + entries2 = node2->entries;
767 + node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
768 + node2->fake.inode = 0;
769 + BUFFER_TRACE(frame->bh, "get_write_access");
770 + ext3_journal_get_write_access(handle, frame->bh);
773 + unsigned icount1 = icount/2, icount2 = icount - icount1;
774 + unsigned hash2 = dx_get_hash(entries + icount1);
775 + dxtrace(printk("Split index %i/%i\n", icount1, icount2));
777 + BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
778 + ext3_journal_get_write_access(handle, frames[0].bh);
780 + memcpy ((char *) entries2, (char *) (entries + icount1),
781 + icount2 * sizeof(struct dx_entry));
782 + dx_set_count (entries, icount1);
783 + dx_set_count (entries2, icount2);
784 + dx_set_limit (entries2, dx_node_limit(dir));
786 + /* Which index block gets the new entry? */
787 + if (at - entries >= icount1) {
788 + frame->at = at = at - entries - icount1 + entries2;
789 + frame->entries = entries = entries2;
790 + swap(frame->bh, bh2);
793 - ext3_debug ("creating next block\n");
795 - BUFFER_TRACE(bh, "get_write_access");
796 - ext3_journal_get_write_access(handle, bh);
797 - de = (struct ext3_dir_entry_2 *) bh->b_data;
799 - de->rec_len = le16_to_cpu(sb->s_blocksize);
800 - dir->u.ext3_i.i_disksize =
801 - dir->i_size = offset + sb->s_blocksize;
802 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
803 - ext3_mark_inode_dirty(handle, dir);
804 + dx_insert_block (frames + 0, hash2, newblock);
805 + dxtrace(dx_show_index ("node", frames[1].entries));
806 + dxtrace(dx_show_index ("node",
807 + ((struct dx_node *) bh2->b_data)->entries));
808 + ext3_journal_dirty_metadata(handle, bh2);
812 - ext3_debug ("skipping to next block\n");
814 - de = (struct ext3_dir_entry_2 *) bh->b_data;
815 + dxtrace(printk("Creating second level index...\n"));
816 + memcpy((char *) entries2, (char *) entries,
817 + icount * sizeof(struct dx_entry));
818 + dx_set_limit(entries2, dx_node_limit(dir));
821 + dx_set_count(entries, 1);
822 + dx_set_block(entries + 0, newblock);
823 + ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
825 + /* Add new access path frame */
826 + frame = frames + 1;
827 + frame->at = at = at - entries + entries2;
828 + frame->entries = entries = entries2;
830 + ext3_journal_get_write_access(handle, frame->bh);
832 + ext3_journal_dirty_metadata(handle, frames[0].bh);
834 - if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
839 - if (ext3_match (namelen, name, de)) {
840 + de = do_split(handle, dir, &bh, frame, hash, &retval);
841 + dx_release (frames);
844 + nlen = EXT3_DIR_REC_LEN(de->name_len);
845 + rlen = le16_to_cpu(de->rec_len);
849 + dx_release (frames);
853 + ext3_warning(sb, __FUNCTION__, "Directory index full!\n");
858 + dx_release (frames);
862 + blocks = dir->i_size >> sb->s_blocksize_bits;
863 + for (block = 0, offset = 0; block < blocks; block++) {
864 + bh = ext3_bread(handle, dir, block, 0, &retval);
867 + de = (ext3_dirent *)bh->b_data;
868 + top = bh->b_data + sb->s_blocksize - reclen;
869 + while ((char *) de <= top) {
870 + if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
875 + if (ext3_match(dentry->d_name.len,dentry->d_name.name,de)) {
879 - if ((le32_to_cpu(de->inode) == 0 &&
880 - le16_to_cpu(de->rec_len) >= rec_len) ||
881 - (le16_to_cpu(de->rec_len) >=
882 - EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
883 - BUFFER_TRACE(bh, "get_write_access");
884 - ext3_journal_get_write_access(handle, bh);
885 - /* By now the buffer is marked for journaling */
886 - offset += le16_to_cpu(de->rec_len);
887 - if (le32_to_cpu(de->inode)) {
888 - de1 = (struct ext3_dir_entry_2 *) ((char *) de +
889 - EXT3_DIR_REC_LEN(de->name_len));
891 - cpu_to_le16(le16_to_cpu(de->rec_len) -
892 - EXT3_DIR_REC_LEN(de->name_len));
893 - de->rec_len = cpu_to_le16(
894 - EXT3_DIR_REC_LEN(de->name_len));
897 - de->file_type = EXT3_FT_UNKNOWN;
899 - de->inode = cpu_to_le32(inode->i_ino);
900 - ext3_set_de_type(dir->i_sb, de, inode->i_mode);
903 - de->name_len = namelen;
904 - memcpy (de->name, name, namelen);
906 - * XXX shouldn't update any times until successful
907 - * completion of syscall, but too many callers depend
910 - * XXX similarly, too many callers depend on
911 - * ext3_new_inode() setting the times, but error
912 - * recovery deletes the inode, so the worst that can
913 - * happen is that the times are slightly out of date
914 - * and/or different from the directory change time.
916 - dir->i_mtime = dir->i_ctime = CURRENT_TIME;
917 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
918 - dir->i_version = ++event;
919 - ext3_mark_inode_dirty(handle, dir);
920 - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
921 - ext3_journal_dirty_metadata(handle, bh);
922 + nlen = EXT3_DIR_REC_LEN(de->name_len);
923 + rlen = le16_to_cpu(de->rec_len);
924 + if ((de->inode ? rlen - nlen: rlen) >= reclen)
926 + de = (ext3_dirent *)((char *)de + rlen);
929 + if (ext3_dx && blocks == 1 && test_opt(sb, INDEX))
930 + goto dx_make_index;
933 + bh = ext3_append(handle, dir, &block, &retval);
936 + de = (ext3_dirent *) bh->b_data;
938 + de->rec_len = cpu_to_le16(rlen = sb->s_blocksize);
943 + BUFFER_TRACE(bh, "get_write_access");
944 + ext3_journal_get_write_access(handle, bh);
945 + /* By now the buffer is marked for journaling */
947 + ext3_dirent *de1 = (ext3_dirent *)((char *)de + nlen);
948 + de1->rec_len = cpu_to_le16(rlen - nlen);
949 + de->rec_len = cpu_to_le16(nlen);
952 + de->file_type = EXT3_FT_UNKNOWN;
954 + de->inode = cpu_to_le32(inode->i_ino);
955 + ext3_set_de_type(dir->i_sb, de, inode->i_mode);
958 + de->name_len = dentry->d_name.len;
959 + memcpy (de->name, dentry->d_name.name, dentry->d_name.len);
961 + * XXX shouldn't update any times until successful
962 + * completion of syscall, but too many callers depend
965 + * XXX similarly, too many callers depend on
966 + * ext3_new_inode() setting the times, but error
967 + * recovery deletes the inode, so the worst that can
968 + * happen is that the times are slightly out of date
969 + * and/or different from the directory change time.
971 + dir->i_mtime = dir->i_ctime = CURRENT_TIME;
972 + ext3_update_dx_flag(dir);
973 + dir->i_version = ++event;
974 + ext3_mark_inode_dirty(handle, dir);
975 + BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
976 + ext3_journal_dirty_metadata(handle, bh);
982 + struct buffer_head *bh2;
983 + struct dx_root *root;
984 + struct dx_frame frames[2], *frame;
985 + struct dx_entry *entries;
991 + dxtrace(printk("Creating index\n"));
992 + ext3_journal_get_write_access(handle, bh);
993 + root = (struct dx_root *) bh->b_data;
995 + EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
996 + bh2 = ext3_append (handle, dir, &block, &retval);
1003 - offset += le16_to_cpu(de->rec_len);
1004 - de = (struct ext3_dir_entry_2 *)
1005 - ((char *) de + le16_to_cpu(de->rec_len));
1006 + data1 = bh2->b_data;
1008 + /* The 0th block becomes the root, move the dirents out */
1009 + de = (struct ext3_dir_entry_2 *)&root->dotdot;
1010 + de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
1011 + len = ((char *) root) + sb->s_blocksize - (char *) de;
1012 + memcpy (data1, de, len);
1013 + de = (ext3_dirent *) data1;
1014 + top = data1 + len;
1015 + while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
1017 + de->rec_len = cpu_to_le16(data1 + sb->s_blocksize - (char *)de);
1018 + /* Initialize the root; the dot dirents already exist */
1019 + de = (ext3_dirent *) (&root->dotdot);
1020 + de->rec_len = cpu_to_le16(sb->s_blocksize-EXT3_DIR_REC_LEN(2));
1021 + memset (&root->info, 0, sizeof(root->info));
1022 + root->info.info_length = sizeof(root->info);
1023 + entries = root->entries;
1024 + dx_set_block (entries, 1);
1025 + dx_set_count (entries, 1);
1026 + dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
1028 + /* Initialize as for dx_probe */
1029 + hash = dx_hash (dentry->d_name.name, dentry->d_name.len);
1031 + frame->entries = entries;
1032 + frame->at = entries;
1035 + de = do_split(handle,dir, &bh, frame, hash, &retval);
1036 + dx_release (frames);
1039 + nlen = EXT3_DIR_REC_LEN(de->name_len);
1040 + rlen = le16_to_cpu(de->rec_len);
1052 @@ -451,7 +1212,8 @@
1053 struct inode * inode;
1056 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
1057 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1058 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
1060 return PTR_ERR(handle);
1062 @@ -478,7 +1240,8 @@
1063 struct inode *inode;
1066 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
1067 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1068 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
1070 return PTR_ERR(handle);
1072 @@ -507,7 +1270,8 @@
1073 if (dir->i_nlink >= EXT3_LINK_MAX)
1076 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
1077 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1078 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
1080 return PTR_ERR(handle);
1082 @@ -550,7 +1320,7 @@
1086 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1087 + ext3_update_dx_flag(dir);
1088 ext3_mark_inode_dirty(handle, dir);
1089 d_instantiate(dentry, inode);
1091 @@ -832,7 +1596,7 @@
1092 ext3_mark_inode_dirty(handle, inode);
1094 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1095 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1096 + ext3_update_dx_flag(dir);
1097 ext3_mark_inode_dirty(handle, dir);
1100 @@ -878,7 +1642,7 @@
1103 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1104 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1105 + ext3_update_dx_flag(dir);
1106 ext3_mark_inode_dirty(handle, dir);
1108 if (!inode->i_nlink)
1109 @@ -904,7 +1668,8 @@
1110 if (l > dir->i_sb->s_blocksize)
1111 return -ENAMETOOLONG;
1113 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
1114 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1115 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5);
1117 return PTR_ERR(handle);
1119 @@ -959,7 +1724,8 @@
1120 if (inode->i_nlink >= EXT3_LINK_MAX)
1123 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
1124 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1125 + EXT3_INDEX_EXTRA_TRANS_BLOCKS);
1127 return PTR_ERR(handle);
1129 @@ -995,7 +1761,8 @@
1131 old_bh = new_bh = dir_bh = NULL;
1133 - handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
1134 + handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
1135 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
1137 return PTR_ERR(handle);
1139 @@ -1069,14 +1837,37 @@
1143 - ext3_delete_entry(handle, old_dir, old_de, old_bh);
1144 + if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
1145 + old_de->name_len != old_dentry->d_name.len ||
1146 + strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
1147 + (retval = ext3_delete_entry(handle, old_dir,
1148 + old_de, old_bh)) == -ENOENT) {
1149 + /* old_de could have moved from under us during htree split, so
1150 + * make sure that we are deleting the right entry. We might
1151 + * also be pointing to a stale entry in the unused part of
1152 + * old_bh so just checking inum and the name isn't enough. */
1153 + struct buffer_head *old_bh2;
1154 + struct ext3_dir_entry_2 *old_de2;
1156 + old_bh2 = ext3_find_entry(old_dentry, &old_de2);
1158 + retval = ext3_delete_entry(handle, old_dir,
1159 + old_de2, old_bh2);
1164 + ext3_warning(old_dir->i_sb, "ext3_rename",
1165 + "Deleting old file (%lu), %d, error=%d",
1166 + old_dir->i_ino, old_dir->i_nlink, retval);
1170 new_inode->i_nlink--;
1171 new_inode->i_ctime = CURRENT_TIME;
1173 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1174 - old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1175 + ext3_update_dx_flag(old_dir);
1177 BUFFER_TRACE(dir_bh, "get_write_access");
1178 ext3_journal_get_write_access(handle, dir_bh);
1179 @@ -1089,7 +1856,7 @@
1180 new_inode->i_nlink--;
1183 - new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1184 + ext3_update_dx_flag(new_dir);
1185 ext3_mark_inode_dirty(handle, new_dir);
1188 --- ./include/linux/ext3_fs.h 2002/03/05 06:18:59 2.1
1189 +++ ./include/linux/ext3_fs.h 2002/03/05 06:26:56
1191 #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */
1192 #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
1193 #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
1194 +#define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */
1196 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1197 #ifndef _LINUX_EXT2_FS_H
1198 @@ -575,6 +576,24 @@
1199 #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
1200 #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
1203 + * Hash Tree Directory indexing
1204 + * (c) Daniel Phillips, 2001
1207 +#define CONFIG_EXT3_INDEX
1209 +#ifdef CONFIG_EXT3_INDEX
1210 + enum {ext3_dx = 1};
1211 + #define is_dx(dir) (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)
1212 +#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
1213 +#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
1215 + enum {ext3_dx = 0};
1216 + #define is_dx(dir) 0
1217 +#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
1218 +#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
1223 --- ./include/linux/ext3_jbd.h 2002/03/05 06:18:59 2.1
1224 +++ ./include/linux/ext3_jbd.h 2002/03/05 06:33:54
1227 #define EXT3_RESERVE_TRANS_BLOCKS 12
1229 +#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
1232 ext3_mark_iloc_dirty(handle_t *handle,
1233 struct inode *inode,