1 --- ./fs/ext3/dir.c 2002/03/05 06:18:59 2.1
2 +++ ./fs/ext3/dir.c 2002/03/05 06:26:56
4 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
7 -static int ext3_readdir(struct file *, void *, filldir_t);
8 +int ext3_readdir(struct file *, void *, filldir_t);
10 struct file_operations ext3_dir_operations = {
11 read: generic_read_dir,
13 return error_msg == NULL ? 1 : 0;
16 -static int ext3_readdir(struct file * filp,
17 +int ext3_readdir(struct file * filp,
18 void * dirent, filldir_t filldir)
21 --- ./fs/ext3/super.c 2002/03/05 06:18:59 2.1
22 +++ ./fs/ext3/super.c 2002/03/05 06:26:56
24 "EXT3 Check option not supported\n");
27 + else if (!strcmp (this_char, "index"))
28 +#ifdef CONFIG_EXT3_INDEX
29 + set_opt (*mount_options, INDEX);
31 + printk("EXT3 index option not supported\n");
33 else if (!strcmp (this_char, "debug"))
34 set_opt (*mount_options, DEBUG);
35 else if (!strcmp (this_char, "errors")) {
37 EXT3_BLOCKS_PER_GROUP(sb),
38 EXT3_INODES_PER_GROUP(sb),
41 + if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
42 + set_opt (EXT3_SB(sb)->s_mount_opt, INDEX);
44 printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ",
46 if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
47 --- ./fs/ext3/namei.c 2002/03/05 06:18:59 2.1
48 +++ ./fs/ext3/namei.c 2002/03/06 00:13:18
50 * David S. Miller (davem@caip.rutgers.edu), 1995
51 * Directory entry file type support and forward compatibility hooks
52 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
53 + * Hash Tree Directory indexing (c)
54 + * Daniel Phillips, 2001
55 + * Hash Tree Directory indexing porting
56 + * Christopher Li, 2002
61 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
62 #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
64 +void ext3_add_compat_feature (struct super_block *sb, unsigned feature)
66 + if (!EXT3_HAS_COMPAT_FEATURE(sb, feature))
69 + ext3_update_dynamic_rev(sb);
70 + EXT3_SET_COMPAT_FEATURE(sb, feature);
71 + ext3_write_super(sb);
76 +static struct buffer_head *ext3_append (handle_t *handle,
77 + struct inode *inode,
78 + u32 *block, int *err)
80 + struct buffer_head *bh;
81 + *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
82 + if((bh = ext3_bread (handle,inode, *block, 1, err))) {
83 + inode->i_size += inode->i_sb->s_blocksize;
84 + ext3_journal_get_write_access(handle,bh);
90 +#define assert(test) do if (!(test)) BUG(); while (0)
94 +#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
97 +typedef struct { u32 v; } le_u32;
98 +typedef struct { u16 v; } le_u16;
100 +#define dxtrace_on(command) command
101 +#define dxtrace_off(command)
102 +#define dxtrace dxtrace_off
112 +struct dx_countlimit
125 + * dx_root_info is laid out so that if it should somehow get overlaid by a
126 + * dirent the two low bits of the hash version will be zero. Therefore, the
127 + * hash version mod 4 should never be 0. Sincerely, the paranoia department.
132 + struct fake_dirent fake1;
134 + struct fake_dirent fake2;
136 + struct dx_root_info
138 + le_u32 reserved_zero;
139 + u8 hash_version; /* 0 now, 1 at release */
140 + u8 info_length; /* 8 */
141 + u8 indirect_levels;
145 + struct dx_entry entries[0];
150 + struct fake_dirent fake;
151 + struct dx_entry entries[0];
157 + struct buffer_head *bh;
158 + struct dx_entry *entries;
159 + struct dx_entry *at;
168 +typedef struct ext3_dir_entry_2 ext3_dirent;
169 +static inline unsigned dx_get_block (struct dx_entry *entry);
170 +static void dx_set_block (struct dx_entry *entry, unsigned value);
171 +static inline unsigned dx_get_hash (struct dx_entry *entry);
172 +static void dx_set_hash (struct dx_entry *entry, unsigned value);
173 +static unsigned dx_get_count (struct dx_entry *entries);
174 +static unsigned dx_get_limit (struct dx_entry *entries);
175 +static void dx_set_count (struct dx_entry *entries, unsigned value);
176 +static void dx_set_limit (struct dx_entry *entries, unsigned value);
177 +static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
178 +static unsigned dx_node_limit (struct inode *dir);
179 +static unsigned dx_hack_hash (const u8 *name, int len);
180 +static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame);
181 +static void dx_release (struct dx_frame *frames);
182 +static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[]);
183 +static void dx_sort_map(struct dx_map_entry *map, unsigned count);
184 +static ext3_dirent *dx_copy_dirents (char *from, char *to,
185 + struct dx_map_entry *map, int count);
186 +static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
189 +#ifdef CONFIG_EXT3_INDEX
191 + * Future: use high four bits of block for coalesce-on-delete flags
192 + * Mask them off for now.
195 +static inline unsigned dx_get_block (struct dx_entry *entry)
197 + return le32_to_cpu(entry->block.v) & 0x0fffffff;
200 +static inline void dx_set_block (struct dx_entry *entry, unsigned value)
202 + entry->block.v = cpu_to_le32(value);
205 +static inline unsigned dx_get_hash (struct dx_entry *entry)
207 + return le32_to_cpu(entry->hash.v);
210 +static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
212 + entry->hash.v = cpu_to_le32(value);
215 +static inline unsigned dx_get_count (struct dx_entry *entries)
217 + return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
220 +static inline unsigned dx_get_limit (struct dx_entry *entries)
222 + return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
225 +static inline void dx_set_count (struct dx_entry *entries, unsigned value)
227 + ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
230 +static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
232 + ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
235 +static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
237 + unsigned entry_space = dir->i_sb->s_blocksize - 24 - infosize;
238 + return 0? 20: entry_space / sizeof(struct dx_entry);
241 +static inline unsigned dx_node_limit (struct inode *dir)
243 + unsigned entry_space = dir->i_sb->s_blocksize - sizeof(struct fake_dirent);
244 + return 0? 22: entry_space / sizeof(struct dx_entry);
247 +/* Hash function - not bad, but still looking for an ideal default */
249 +static unsigned dx_hack_hash (const u8 *name, int len)
251 + u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
254 + u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
255 + if (hash & 0x80000000) hash -= 0x7fffffff;
259 + return 80; /* FIXME: for test only */
263 +#define dx_hash(s,n) (dx_hack_hash(s,n) << 1)
268 +static void dx_show_index (char * label, struct dx_entry *entries)
270 + int i, n = dx_get_count (entries);
271 + printk("%s index ", label);
272 + for (i = 0; i < n; i++)
274 + printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
286 +static struct stats dx_show_leaf (ext3_dirent *de, int size, int show_names)
288 + unsigned names = 0, space = 0;
289 + char *base = (char *) de;
291 + while ((char *) de < base + size)
297 + int len = de->name_len;
298 + char *name = de->name;
299 + while (len--) printk("%c", *name++);
300 + printk(":%x.%u ", dx_hash (de->name, de->name_len), ((char *) de - base));
302 + space += EXT3_DIR_REC_LEN(de->name_len);
305 + de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
307 + printk("(%i)\n", names);
308 + return (struct stats) { names, space, 1 };
311 +struct stats dx_show_entries (struct inode *dir, struct dx_entry *entries, int levels)
313 + unsigned blocksize = dir->i_sb->s_blocksize;
314 + unsigned count = dx_get_count (entries), names = 0, space = 0, i;
315 + unsigned bcount = 0;
316 + struct buffer_head *bh;
318 + printk("%i indexed blocks...\n", count);
319 + for (i = 0; i < count; i++, entries++)
321 + u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
322 + u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
323 + struct stats stats;
324 + printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
325 + if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
327 + dx_show_entries (dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
328 + dx_show_leaf ((ext3_dirent *) bh->b_data, blocksize, 0);
329 + names += stats.names;
330 + space += stats.space;
331 + bcount += stats.bcount;
335 + printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ",
336 + names, space/bcount,(space/bcount)*100/blocksize);
337 + return (struct stats) { names, space, bcount};
340 +static void dx_show_buckets (struct inode *dir)
342 + struct buffer_head *bh;
343 + struct dx_root *root;
345 + if (!(bh = ext3_bread (NULL,dir, 0, 0,&err))) return;
346 + root = (struct dx_root *) bh->b_data;
347 + dx_show_entries (dir, root->entries, root->info.indirect_levels);
351 +ssize_t hack_show_dir (struct file * filp, void * dirent, filldir_t filldir)
353 + if (is_dx (filp->f_dentry->d_inode) && !filp->f_pos)
354 + dx_show_buckets (filp->f_dentry->d_inode);
355 + return ext3_readdir(filp,dirent,filldir);
359 + * Probe for a directory leaf block to search
362 +static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame)
364 + unsigned count, indirect;
365 + struct dx_entry *at, *entries, *p, *q, *m;
366 + struct dx_root *root;
367 + struct buffer_head *bh;
369 + if (!(bh = ext3_bread (NULL,dir, 0, 0,&err)))
371 + root = (struct dx_root *) bh->b_data;
372 + if (root->info.hash_version > 0 || root->info.unused_flags & 1)
374 + if ((indirect = root->info.indirect_levels) > 1)
376 + entries = (struct dx_entry *) (((char *) &root->info) + root->info.info_length);
377 + assert (dx_get_limit(entries) == dx_root_limit(dir, root->info.info_length));
378 + dxtrace (printk("Look up %x", hash));
381 + count = dx_get_count(entries);
382 + assert (count && count <= dx_get_limit(entries));
384 + q = entries + count - 1;
388 + dxtrace(printk("."));
389 + if (dx_get_hash(m) > hash)
395 + if (0) // linear search cross check
397 + unsigned n = count - 1;
401 + dxtrace(printk(","));
402 + if (dx_get_hash(++at) > hash)
408 + assert (at == p - 1);
412 + dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
414 + frame->entries = entries;
416 + if (!indirect--) return frame;
417 + if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0,&err)))
419 + at = entries = ((struct dx_node *) bh->b_data)->entries;
420 + assert (dx_get_limit(entries) == dx_node_limit (dir));
429 +static void dx_release (struct dx_frame *frames)
431 + if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
432 + brelse (frames[1].bh);
433 + brelse (frames[0].bh);
437 + * Directory block splitting, compacting
440 +static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[])
443 + char *base = (char *) de;
444 + while ((char *) de < base + size)
446 + map[count].hash = dx_hash (de->name, de->name_len);
447 + map[count].offs = (u32) ((char *) de - base);
448 + de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
454 +static void dx_sort_map (struct dx_map_entry *map, unsigned count)
456 + struct dx_map_entry *p, *q, *top = map + count - 1;
458 + /* Combsort until bubble sort doesn't suck */
461 + count = count*10/13;
462 + if (count - 9 < 2) /* 9, 10 -> 11 */
464 + for (p = top, q = p - count; q >= map; p--, q--)
465 + if (p->hash < q->hash)
468 + /* Garden variety bubble sort */
474 + if (q[1].hash >= q[0].hash)
482 +static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block)
484 + struct dx_entry *entries = frame->entries, *at = frame->at;
485 + assert (dx_get_count (entries) < dx_get_limit (entries));
486 + memmove (at + 2, at+1, (char *) (entries + dx_get_count(entries)) - (char *) (at));
487 + dx_set_hash(at + 1, hash);
488 + dx_set_block(at + 1, block);
489 + dx_set_count(entries, dx_get_count(entries) + 1);
494 * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
500 + * p is at least 6 bytes before the end of page
502 +static inline ext3_dirent *ext3_next_entry(ext3_dirent *p)
504 + return (ext3_dirent *)((char*)p + le16_to_cpu(p->rec_len));
511 * finds an entry in the specified directory with the wanted name. It
513 * The returned buffer_head has ->b_count elevated. The caller is expected
514 * to brelse() it when appropriate.
518 static struct buffer_head * ext3_find_entry (struct dentry *dentry,
519 struct ext3_dir_entry_2 ** res_dir)
521 @@ -119,10 +564,76 @@
524 struct inode *dir = dentry->d_parent->d_inode;
527 + unsigned blocksize;
528 + ext3_dirent *de, *top;
532 + blocksize = sb->s_blocksize;
533 + namelen = dentry->d_name.len;
534 + name = dentry->d_name.name;
535 + if (namelen > EXT3_NAME_LEN)
537 + if (ext3_dx && is_dx(dir)) {
538 + u32 hash = dx_hash (name, namelen);
539 + struct dx_frame frames[2], *frame;
540 + if (!(frame = dx_probe (dir, hash, frames)))
543 + block = dx_get_block(frame->at);
544 + if (!(bh = ext3_bread (NULL,dir, block, 0, &err)))
546 + de = (struct ext3_dir_entry_2 *) bh->b_data;
547 + top = (struct ext3_dir_entry_2 *) ((char *) de + blocksize
548 + - EXT3_DIR_REC_LEN(0));
549 + for (; de < top; de = ext3_next_entry(de))
550 + if (ext3_match (namelen, name, de)) {
551 + if (!ext3_check_dir_entry("ext3_find_entry",
553 + (block<<EXT3_BLOCK_SIZE_BITS(sb))
554 + +((char *)de - bh->b_data))) {
562 + /* Same hash continues in next block? Search on. */
563 + if (++(frame->at) == frame->entries + dx_get_count(frame->entries))
565 + struct buffer_head *bh2;
566 + if (frame == frames)
568 + if (++(frames->at) == frames->entries + dx_get_count(frames->entries))
570 + /* should omit read if not continued */
571 + if (!(bh2 = ext3_bread (NULL, dir,
572 + dx_get_block(frames->at),
575 + brelse (frame->bh);
577 + frame->at = frame->entries = ((struct dx_node *) bh2->b_data)->entries;
578 + /* Subtle: the 0th entry has the count, find the hash in frame above */
579 + if ((dx_get_hash(frames->at) & -2) == hash)
583 + if ((dx_get_hash(frame->at) & -2) == hash)
586 + dxtrace(printk("%s not found\n", name));
587 + dx_release (frames);
590 + dx_release (frames);
595 nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
596 start = dir->u.ext3_i.i_dir_start_lookup;
597 if (start >= nblocks)
599 de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
602 +static ext3_dirent *
603 +dx_copy_dirents (char *from, char *to, struct dx_map_entry *map, int count)
605 + unsigned rec_len = 0;
608 + ext3_dirent *de = (ext3_dirent *) (from + map->offs);
609 + rec_len = EXT3_DIR_REC_LEN(de->name_len);
610 + memcpy (to, de, rec_len);
611 + ((ext3_dirent *) to)->rec_len = rec_len;
615 + return (ext3_dirent *) (to - rec_len);
618 +#ifdef CONFIG_EXT3_INDEX
619 +static ext3_dirent *do_split(handle_t *handle, struct inode *dir,
620 + struct buffer_head **bh,struct dx_frame *frame,
621 + u32 hash, int *error)
623 + unsigned blocksize = dir->i_sb->s_blocksize;
624 + unsigned count, continued;
625 + struct buffer_head *bh2;
627 + unsigned MAX_DX_MAP = PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1;
629 + struct dx_map_entry map[MAX_DX_MAP];
630 + char *data1 = (*bh)->b_data, *data2, *data3;
632 + ext3_dirent *de, *de2;
634 + bh2 = ext3_append (handle, dir, &newblock, error);
639 + return (ext3_dirent *)bh2;
642 + BUFFER_TRACE(*bh, "get_write_access");
643 + ext3_journal_get_write_access(handle, *bh);
644 + BUFFER_TRACE(frame->bh, "get_write_access");
645 + ext3_journal_get_write_access(handle, frame->bh);
647 + data2 = bh2->b_data;
649 + count = dx_make_map ((ext3_dirent *) data1, blocksize, map);
650 + split = count/2; // need to adjust to actual middle
651 + dx_sort_map (map, count);
652 + hash2 = map[split].hash;
653 + continued = hash2 == map[split - 1].hash;
654 + dxtrace(printk("Split block %i at %x, %i/%i\n",
655 + dx_get_block(frame->at), hash2, split, count-split));
657 + /* Fancy dance to stay within two buffers */
658 + de2 = dx_copy_dirents (data1, data2, map + split, count - split);
659 + data3 = (char *) de2 + de2->rec_len;
660 + de = dx_copy_dirents (data1, data3, map, split);
661 + memcpy(data1, data3, (char *) de + de->rec_len - data3);
662 + de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de
663 + de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
664 + de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
665 + dxtrace(dx_show_leaf ((ext3_dirent *) data1, blocksize, 1));
666 + dxtrace(dx_show_leaf ((ext3_dirent *) data2, blocksize, 1));
668 + /* Which block gets the new entry? */
674 + dx_insert_block (frame, hash2 + continued, newblock);
675 + ext3_journal_dirty_metadata (handle, bh2);
677 + ext3_journal_dirty_metadata (handle, frame->bh);
678 + dxtrace(dx_show_index ("frame", frame->entries));
689 * AKPM: the journalling code here looks wrong on the error paths
692 static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
695 @@ -258,117 +852,284 @@
696 const char *name = dentry->d_name.name;
697 int namelen = dentry->d_name.len;
698 unsigned long offset;
699 - unsigned short rec_len;
700 struct buffer_head * bh;
701 struct ext3_dir_entry_2 * de, * de1;
702 - struct super_block * sb;
703 + struct super_block * sb = dir->i_sb;
705 + unsigned short reclen = EXT3_DIR_REC_LEN(namelen);
708 + unsigned blocksize = sb->s_blocksize;
709 + unsigned nlen, rlen;
715 - bh = ext3_bread (handle, dir, 0, 0, &retval);
718 - rec_len = EXT3_DIR_REC_LEN(namelen);
720 - de = (struct ext3_dir_entry_2 *) bh->b_data;
722 - if ((char *)de >= sb->s_blocksize + bh->b_data) {
725 - bh = ext3_bread (handle, dir,
726 - offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
729 - if (dir->i_size <= offset) {
730 - if (dir->i_size == 0) {
733 + if (ext3_dx && is_dx(dir))
735 + struct dx_frame frames[2], *frame;
736 + struct dx_entry *entries, *at;
740 + hash = dx_hash (name, namelen);
741 + frame = dx_probe (dir, hash, frames); // do something if null
742 + entries = frame->entries;
745 + if (!(bh = ext3_bread (handle,dir, dx_get_block(frame->at), 0,&retval)))
748 + BUFFER_TRACE(bh, "get_write_access");
749 + ext3_journal_get_write_access(handle, bh);
751 + data1 = bh->b_data;
752 + de = (ext3_dirent *) data1;
753 + top = data1 + (0? 200: blocksize);
754 + while ((char *) de < top)
756 + /* FIXME: check EEXIST and dir */
757 + nlen = EXT3_DIR_REC_LEN(de->name_len);
758 + rlen = le16_to_cpu(de->rec_len);
759 + if ((de->inode? rlen - nlen: rlen) >= reclen)
761 + de = (ext3_dirent *) ((char *) de + rlen);
763 + /* Block full, should compress but for now just split */
764 + dxtrace(printk("using %u of %u node entries\n",
765 + dx_get_count(entries), dx_get_limit(entries)));
766 + /* Need to split index? */
767 + if (dx_get_count(entries) == dx_get_limit(entries))
770 + unsigned icount = dx_get_count(entries);
772 + int levels = frame - frames;
773 + struct dx_entry *entries2;
774 + struct buffer_head *bh2;
775 + if (levels && dx_get_count(frames->entries) == dx_get_limit(frames->entries))
777 + bh2 = ext3_append (handle, dir, &newblock, &retval);
780 + idata2 = bh2->b_data;
781 + entries2 = ((struct dx_node *) idata2)->entries;
782 + ((struct dx_node *) idata2)->fake.rec_len = cpu_to_le16(blocksize);
783 + /* fake.inode already 0 */
784 + /* Seems that is not true. We still need to set inode = 0 -Chrisl*/
785 + ((struct dx_node *) idata2)->fake.inode = 0;
786 + BUFFER_TRACE(frame->bh, "get_write_access");
787 + ext3_journal_get_write_access(handle, frame->bh);
790 + unsigned icount1 = icount/2, icount2 = icount - icount1;
791 + unsigned hash2 = dx_get_hash(entries + icount1);
792 + dxtrace(printk("Split index %i/%i\n", icount1, icount2));
794 + BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
795 + ext3_journal_get_write_access(handle, frames[0].bh);
797 + memcpy ((char *) entries2, (char *) (entries + icount1),
798 + icount2 * sizeof(struct dx_entry));
799 + dx_set_count (entries, icount1);
800 + dx_set_count (entries2, icount2);
801 + dx_set_limit (entries2, dx_node_limit(dir));
803 + /* Which index block gets the new entry? */
804 + if (at - entries > icount1) {
805 + frame->at = at = at - entries - icount1 + entries2;
806 + frame->entries = entries = entries2;
807 + swap(frame->bh, bh2);
810 - ext3_debug ("creating next block\n");
812 - BUFFER_TRACE(bh, "get_write_access");
813 - ext3_journal_get_write_access(handle, bh);
814 - de = (struct ext3_dir_entry_2 *) bh->b_data;
816 - de->rec_len = le16_to_cpu(sb->s_blocksize);
817 - dir->u.ext3_i.i_disksize =
818 - dir->i_size = offset + sb->s_blocksize;
819 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
820 - ext3_mark_inode_dirty(handle, dir);
821 + dx_insert_block (frames + 0, hash2, newblock);
822 + dxtrace(dx_show_index ("node", frames[1].entries));
823 + dxtrace(dx_show_index ("node",
824 + ((struct dx_node *) bh2->b_data)->entries));
825 + ext3_journal_dirty_metadata(handle, bh2);
829 - ext3_debug ("skipping to next block\n");
831 - de = (struct ext3_dir_entry_2 *) bh->b_data;
832 + dxtrace(printk("Creating second level index...\n"));
833 + memcpy((char *) entries2, (char *) entries,
834 + icount * sizeof(struct dx_entry));
835 + dx_set_limit(entries2, dx_node_limit(dir));
838 + dx_set_count(entries, 1);
839 + dx_set_block(entries + 0, newblock);
840 + ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
842 + /* Add new access path frame */
843 + frame = frames + 1;
844 + frame->at = at = at - entries + entries2;
845 + frame->entries = entries = entries2;
847 + ext3_journal_get_write_access(handle, frame->bh);
849 + ext3_journal_dirty_metadata(handle, frames[0].bh);
851 - if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
856 - if (ext3_match (namelen, name, de)) {
857 + de = do_split(handle, dir, &bh, frame, hash, &retval);
858 + dx_release (frames);
861 + nlen = EXT3_DIR_REC_LEN(de->name_len);
862 + rlen = le16_to_cpu(de->rec_len);
866 + dx_release (frames);
870 + ext3_warning(sb, __FUNCTION__, "Directory index full!\n");
875 + dx_release (frames);
878 + block = offset = 0;
879 + while (offset < dir->i_size) {
880 + bh = ext3_bread (handle, dir, block, 0, &retval);
883 + de = (struct ext3_dir_entry_2 *) bh->b_data;
884 + top = bh->b_data+blocksize-reclen;
885 + while ((char *) de <= top) {
887 + if (!ext3_check_dir_entry ("ext3_add_entry", dir, de,
892 + if (ext3_match (namelen, name, de)) {
896 - if ((le32_to_cpu(de->inode) == 0 &&
897 - le16_to_cpu(de->rec_len) >= rec_len) ||
898 - (le16_to_cpu(de->rec_len) >=
899 - EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
900 - BUFFER_TRACE(bh, "get_write_access");
901 - ext3_journal_get_write_access(handle, bh);
902 - /* By now the buffer is marked for journaling */
903 - offset += le16_to_cpu(de->rec_len);
904 - if (le32_to_cpu(de->inode)) {
905 - de1 = (struct ext3_dir_entry_2 *) ((char *) de +
906 - EXT3_DIR_REC_LEN(de->name_len));
908 - cpu_to_le16(le16_to_cpu(de->rec_len) -
909 - EXT3_DIR_REC_LEN(de->name_len));
910 - de->rec_len = cpu_to_le16(
911 - EXT3_DIR_REC_LEN(de->name_len));
914 - de->file_type = EXT3_FT_UNKNOWN;
916 - de->inode = cpu_to_le32(inode->i_ino);
917 - ext3_set_de_type(dir->i_sb, de, inode->i_mode);
920 - de->name_len = namelen;
921 - memcpy (de->name, name, namelen);
923 - * XXX shouldn't update any times until successful
924 - * completion of syscall, but too many callers depend
927 - * XXX similarly, too many callers depend on
928 - * ext3_new_inode() setting the times, but error
929 - * recovery deletes the inode, so the worst that can
930 - * happen is that the times are slightly out of date
931 - * and/or different from the directory change time.
933 - dir->i_mtime = dir->i_ctime = CURRENT_TIME;
934 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
935 - ext3_mark_inode_dirty(handle, dir);
936 - dir->i_version = ++event;
937 - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
938 - ext3_journal_dirty_metadata(handle, bh);
939 + nlen = EXT3_DIR_REC_LEN(de->name_len);
940 + rlen = le16_to_cpu(de->rec_len);
941 + if ((de->inode? rlen - nlen: rlen) >= reclen)
943 + de = (struct ext3_dir_entry_2 *) ((char *) de + rlen);
946 + if (ext3_dx && dir->i_size==blocksize && test_opt(sb, INDEX))
947 + goto dx_make_index;
950 + bh = ext3_append(handle, dir, &block, &retval);
953 + de = (struct ext3_dir_entry_2 *) bh->b_data;
955 + de->rec_len = cpu_to_le16(rlen = blocksize);
960 + BUFFER_TRACE(bh, "get_write_access");
961 + ext3_journal_get_write_access(handle, bh);
962 + /* By now the buffer is marked for journaling */
964 + de1 = (struct ext3_dir_entry_2 *) ((char *) de + nlen);
965 + de1->rec_len = cpu_to_le16(rlen - nlen);
966 + de->rec_len = cpu_to_le16(nlen);
969 + de->file_type = EXT3_FT_UNKNOWN;
971 + de->inode = cpu_to_le32(inode->i_ino);
972 + ext3_set_de_type(dir->i_sb, de, inode->i_mode);
975 + de->name_len = namelen;
976 + memcpy (de->name, name, namelen);
978 + * XXX shouldn't update any times until successful
979 + * completion of syscall, but too many callers depend
982 + * XXX similarly, too many callers depend on
983 + * ext3_new_inode() setting the times, but error
984 + * recovery deletes the inode, so the worst that can
985 + * happen is that the times are slightly out of date
986 + * and/or different from the directory change time.
988 + dir->i_mtime = dir->i_ctime = CURRENT_TIME;
989 + /* EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; */
990 + ext3_mark_inode_dirty(handle, dir);
991 + dir->i_version = ++event;
992 + BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
993 + ext3_journal_dirty_metadata(handle, bh);
999 + struct buffer_head *bh2;
1000 + struct dx_root *root;
1001 + struct dx_frame frames[2], *frame;
1002 + struct dx_entry *entries;
1003 + struct ext3_dir_entry_2 *de2;
1008 + dxtrace(printk("Creating index\n"));
1009 + ext3_journal_get_write_access(handle, bh);
1010 + root = (struct dx_root *) bh->b_data;
1012 + ext3_add_compat_feature (sb, EXT3_FEATURE_COMPAT_DIR_INDEX);
1013 + EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
1014 + bh2 = ext3_append (handle, dir, &block, &retval);
1021 - offset += le16_to_cpu(de->rec_len);
1022 - de = (struct ext3_dir_entry_2 *)
1023 - ((char *) de + le16_to_cpu(de->rec_len));
1024 + data1 = bh2->b_data;
1026 + /* The 0th block becomes the root, move the dirents out */
1027 + de = (ext3_dirent *) &root->info;
1028 + len = ((char *) root) + blocksize - (char *) de;
1029 + memcpy (data1, de, len);
1030 + de = (ext3_dirent *) data1;
1031 + top = data1 + len;
1032 + while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
1034 + de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
1035 + /* Initialize the root; the dot dirents already exist */
1036 + de = (ext3_dirent *) (&root->fake2);
1037 + de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
1038 + memset (&root->info, 0, sizeof(root->info));
1039 + root->info.info_length = sizeof(root->info);
1040 + entries = root->entries;
1041 + dx_set_block (entries, 1);
1042 + dx_set_count (entries, 1);
1043 + dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
1045 + /* Initialize as for dx_probe */
1046 + hash = dx_hash (name, namelen);
1048 + frame->entries = entries;
1049 + frame->at = entries;
1052 + de = do_split(handle,dir, &bh, frame, hash, &retval);
1053 + dx_release (frames);
1056 + nlen = EXT3_DIR_REC_LEN(de->name_len);
1057 + rlen = le16_to_cpu(de->rec_len);
1070 * ext3_delete_entry deletes a directory entry by merging it with the
1072 @@ -451,7 +1212,8 @@
1073 struct inode * inode;
1076 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
1077 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1078 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
1080 return PTR_ERR(handle);
1082 @@ -478,7 +1240,8 @@
1083 struct inode *inode;
1086 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
1087 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1088 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
1090 return PTR_ERR(handle);
1092 @@ -507,7 +1270,8 @@
1093 if (dir->i_nlink >= EXT3_LINK_MAX)
1096 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
1097 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1098 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
1100 return PTR_ERR(handle);
1102 @@ -832,7 +1596,7 @@
1103 ext3_mark_inode_dirty(handle, inode);
1105 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1106 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1107 + // EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
1108 ext3_mark_inode_dirty(handle, dir);
1111 @@ -878,7 +1642,7 @@
1114 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1115 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1116 + // EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
1117 ext3_mark_inode_dirty(handle, dir);
1119 if (!inode->i_nlink)
1120 @@ -904,7 +1668,8 @@
1121 if (l > dir->i_sb->s_blocksize)
1122 return -ENAMETOOLONG;
1124 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
1125 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1126 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5);
1128 return PTR_ERR(handle);
1130 @@ -959,7 +1724,8 @@
1131 if (inode->i_nlink >= EXT3_LINK_MAX)
1134 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
1135 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1136 + EXT3_INDEX_EXTRA_TRANS_BLOCKS);
1138 return PTR_ERR(handle);
1140 @@ -995,7 +1761,8 @@
1142 old_bh = new_bh = dir_bh = NULL;
1144 - handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
1145 + handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
1146 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
1148 return PTR_ERR(handle);
1150 @@ -1077,7 +1844,7 @@
1151 new_inode->i_ctime = CURRENT_TIME;
1153 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1154 - old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1155 + // EXT3_I(old_dir)->i_flags &= ~EXT3_INDEX_FL;
1157 BUFFER_TRACE(dir_bh, "get_write_access");
1158 ext3_journal_get_write_access(handle, dir_bh);
1159 @@ -1089,7 +1856,7 @@
1160 new_inode->i_nlink--;
1163 - new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1164 + // EXT3_I(new_dir)->i_flags &= ~EXT3_INDEX_FL;
1165 ext3_mark_inode_dirty(handle, new_dir);
1168 --- ./include/linux/ext3_fs.h 2002/03/05 06:18:59 2.1
1169 +++ ./include/linux/ext3_fs.h 2002/03/05 06:26:56
1171 #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */
1172 #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
1173 #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
1174 +#define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */
1176 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1177 #ifndef _LINUX_EXT2_FS_H
1178 @@ -575,6 +576,24 @@
1179 #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
1180 #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
1183 + * Hash Tree Directory indexing
1184 + * (c) Daniel Phillips, 2001
1187 +#define CONFIG_EXT3_INDEX
1189 +#ifdef CONFIG_EXT3_INDEX
1190 + enum {ext3_dx = 1};
1191 + #define is_dx(dir) (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)
1192 +#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
1193 +#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
1195 + enum {ext3_dx = 0};
1196 + #define is_dx(dir) 0
1197 +#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
1198 +#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
1203 --- ./include/linux/ext3_jbd.h 2002/03/05 06:18:59 2.1
1204 +++ ./include/linux/ext3_jbd.h 2002/03/05 06:33:54
1207 #define EXT3_RESERVE_TRANS_BLOCKS 12
1209 +#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
1212 ext3_mark_iloc_dirty(handle_t *handle,
1213 struct inode *inode,