1 --- ./fs/ext3/dir.c 2002/03/05 06:18:59 2.1
2 +++ ./fs/ext3/dir.c 2002/03/05 06:26:56
4 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
7 -static int ext3_readdir(struct file *, void *, filldir_t);
8 +int ext3_readdir(struct file *, void *, filldir_t);
10 struct file_operations ext3_dir_operations = {
11 read: generic_read_dir,
13 return error_msg == NULL ? 1 : 0;
16 -static int ext3_readdir(struct file * filp,
17 +int ext3_readdir(struct file * filp,
18 void * dirent, filldir_t filldir)
21 --- ./fs/ext3/super.c 2002/03/05 06:18:59 2.1
22 +++ ./fs/ext3/super.c 2002/03/05 06:26:56
24 "EXT3 Check option not supported\n");
27 + else if (!strcmp (this_char, "index"))
28 +#ifdef CONFIG_EXT3_INDEX
29 + set_opt (*mount_options, INDEX);
31 + printk("EXT3 index option not supported\n");
33 else if (!strcmp (this_char, "debug"))
34 set_opt (*mount_options, DEBUG);
35 else if (!strcmp (this_char, "errors")) {
37 EXT3_BLOCKS_PER_GROUP(sb),
38 EXT3_INODES_PER_GROUP(sb),
41 + if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
42 + set_opt (EXT3_SB(sb)->s_mount_opt, INDEX);
44 printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ",
46 if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
47 --- ./fs/ext3/namei.c 2002/03/05 06:18:59 2.1
48 +++ ./fs/ext3/namei.c 2002/03/06 00:13:18
50 * David S. Miller (davem@caip.rutgers.edu), 1995
51 * Directory entry file type support and forward compatibility hooks
52 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
53 + * Hash Tree Directory indexing (c)
54 + * Daniel Phillips, 2001
55 + * Hash Tree Directory indexing porting
56 + * Christopher Li, 2002
61 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
62 #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
64 +void ext3_add_compat_feature (struct super_block *sb, unsigned feature)
66 + if (!EXT3_HAS_COMPAT_FEATURE(sb, feature))
69 + ext3_update_dynamic_rev(sb);
70 + EXT3_SET_COMPAT_FEATURE(sb, feature);
71 + ext3_write_super(sb);
76 +static struct buffer_head *ext3_append(handle_t *handle,
77 + struct inode *inode,
78 + u32 *block, int *err)
80 + struct buffer_head *bh;
82 + *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
84 + if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
85 + inode->i_size += inode->i_sb->s_blocksize;
86 + EXT3_I(inode)->i_disksize = inode->i_size;
87 + ext3_journal_get_write_access(handle,bh);
93 +#define assert(test) J_ASSERT(test)
97 +#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
100 +typedef struct { u32 v; } le_u32;
101 +typedef struct { u16 v; } le_u16;
103 +#define dxtrace_on(command) command
104 +#define dxtrace_off(command)
105 +#define dxtrace dxtrace_off
115 +struct dx_countlimit
128 + * dx_root_info is laid out so that if it should somehow get overlaid by a
129 + * dirent the two low bits of the hash version will be zero. Therefore, the
130 + * hash version mod 4 should never be 0. Sincerely, the paranoia department.
135 + struct fake_dirent fake1;
137 + struct fake_dirent fake2;
139 + struct dx_root_info
141 + le_u32 reserved_zero;
142 + u8 hash_version; /* 0 now, 1 at release */
143 + u8 info_length; /* 8 */
144 + u8 indirect_levels;
148 + struct dx_entry entries[0];
153 + struct fake_dirent fake;
154 + struct dx_entry entries[0];
160 + struct buffer_head *bh;
161 + struct dx_entry *entries;
162 + struct dx_entry *at;
171 +typedef struct ext3_dir_entry_2 ext3_dirent;
172 +static inline unsigned dx_get_block (struct dx_entry *entry);
173 +static void dx_set_block (struct dx_entry *entry, unsigned value);
174 +static inline unsigned dx_get_hash (struct dx_entry *entry);
175 +static void dx_set_hash (struct dx_entry *entry, unsigned value);
176 +static unsigned dx_get_count (struct dx_entry *entries);
177 +static unsigned dx_get_limit (struct dx_entry *entries);
178 +static void dx_set_count (struct dx_entry *entries, unsigned value);
179 +static void dx_set_limit (struct dx_entry *entries, unsigned value);
180 +static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
181 +static unsigned dx_node_limit (struct inode *dir);
182 +static unsigned dx_hack_hash (const u8 *name, int len);
183 +static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame);
184 +static void dx_release (struct dx_frame *frames);
185 +static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[]);
186 +static void dx_sort_map(struct dx_map_entry *map, unsigned count);
187 +static ext3_dirent *dx_copy_dirents (char *from, char *to,
188 + struct dx_map_entry *map, int count);
189 +static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
192 +#ifdef CONFIG_EXT3_INDEX
194 + * Future: use high four bits of block for coalesce-on-delete flags
195 + * Mask them off for now.
198 +static inline unsigned dx_get_block (struct dx_entry *entry)
200 + return le32_to_cpu(entry->block.v) & 0x0fffffff;
203 +static inline void dx_set_block (struct dx_entry *entry, unsigned value)
205 + entry->block.v = cpu_to_le32(value);
208 +static inline unsigned dx_get_hash (struct dx_entry *entry)
210 + return le32_to_cpu(entry->hash.v);
213 +static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
215 + entry->hash.v = cpu_to_le32(value);
218 +static inline unsigned dx_get_count (struct dx_entry *entries)
220 + return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
223 +static inline unsigned dx_get_limit (struct dx_entry *entries)
225 + return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
228 +static inline void dx_set_count (struct dx_entry *entries, unsigned value)
230 + ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
233 +static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
235 + ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
238 +static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
240 + unsigned entry_space = dir->i_sb->s_blocksize - 24 - infosize;
241 + return 0? 20: entry_space / sizeof(struct dx_entry);
244 +static inline unsigned dx_node_limit (struct inode *dir)
246 + unsigned entry_space = dir->i_sb->s_blocksize - sizeof(struct fake_dirent);
247 + return 0? 22: entry_space / sizeof(struct dx_entry);
250 +/* Hash function - not bad, but still looking for an ideal default */
252 +static unsigned dx_hack_hash (const u8 *name, int len)
254 + u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
257 + u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
258 + if (hash & 0x80000000) hash -= 0x7fffffff;
262 + return 80; /* FIXME: for test only */
266 +#define dx_hash(s,n) (dx_hack_hash(s,n) << 1)
271 +static void dx_show_index (char * label, struct dx_entry *entries)
273 + int i, n = dx_get_count (entries);
274 + printk("%s index ", label);
275 + for (i = 0; i < n; i++)
277 + printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
289 +static struct stats dx_show_leaf (ext3_dirent *de, int size, int show_names)
291 + unsigned names = 0, space = 0;
292 + char *base = (char *) de;
294 + while ((char *) de < base + size)
300 + int len = de->name_len;
301 + char *name = de->name;
302 + while (len--) printk("%c", *name++);
303 + printk(":%x.%u ", dx_hash (de->name, de->name_len), ((char *) de - base));
305 + space += EXT3_DIR_REC_LEN(de->name_len);
308 + de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
310 + printk("(%i)\n", names);
311 + return (struct stats) { names, space, 1 };
314 +struct stats dx_show_entries (struct inode *dir, struct dx_entry *entries, int levels)
316 + unsigned blocksize = dir->i_sb->s_blocksize;
317 + unsigned count = dx_get_count (entries), names = 0, space = 0, i;
318 + unsigned bcount = 0;
319 + struct buffer_head *bh;
321 + printk("%i indexed blocks...\n", count);
322 + for (i = 0; i < count; i++, entries++)
324 + u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
325 + u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
326 + struct stats stats;
327 + printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
328 + if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
330 + dx_show_entries (dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
331 + dx_show_leaf ((ext3_dirent *) bh->b_data, blocksize, 0);
332 + names += stats.names;
333 + space += stats.space;
334 + bcount += stats.bcount;
338 + printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ",
339 + names, space/bcount,(space/bcount)*100/blocksize);
340 + return (struct stats) { names, space, bcount};
343 +static void dx_show_buckets (struct inode *dir)
345 + struct buffer_head *bh;
346 + struct dx_root *root;
348 + if (!(bh = ext3_bread (NULL,dir, 0, 0,&err))) return;
349 + root = (struct dx_root *) bh->b_data;
350 + dx_show_entries (dir, root->entries, root->info.indirect_levels);
354 +ssize_t hack_show_dir (struct file * filp, void * dirent, filldir_t filldir)
356 + if (is_dx (filp->f_dentry->d_inode) && !filp->f_pos)
357 + dx_show_buckets (filp->f_dentry->d_inode);
358 + return ext3_readdir(filp,dirent,filldir);
362 + * Probe for a directory leaf block to search
365 +static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame)
367 + unsigned count, indirect;
368 + struct dx_entry *at, *entries, *p, *q, *m;
369 + struct dx_root *root;
370 + struct buffer_head *bh;
372 + if (!(bh = ext3_bread (NULL,dir, 0, 0,&err)))
374 + root = (struct dx_root *) bh->b_data;
375 + if (root->info.hash_version > 0 || root->info.unused_flags & 1)
377 + if ((indirect = root->info.indirect_levels) > 1)
379 + entries = (struct dx_entry *) (((char *) &root->info) + root->info.info_length);
380 + assert (dx_get_limit(entries) == dx_root_limit(dir, root->info.info_length));
381 + dxtrace (printk("Look up %x", hash));
384 + count = dx_get_count(entries);
385 + assert (count && count <= dx_get_limit(entries));
387 + q = entries + count - 1;
391 + dxtrace(printk("."));
392 + if (dx_get_hash(m) > hash)
398 + if (0) // linear search cross check
400 + unsigned n = count - 1;
404 + dxtrace(printk(","));
405 + if (dx_get_hash(++at) > hash)
411 + assert (at == p - 1);
415 + dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
417 + frame->entries = entries;
419 + if (!indirect--) return frame;
420 + if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0,&err)))
422 + at = entries = ((struct dx_node *) bh->b_data)->entries;
423 + assert (dx_get_limit(entries) == dx_node_limit (dir));
432 +static void dx_release (struct dx_frame *frames)
434 + if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
435 + brelse (frames[1].bh);
436 + brelse (frames[0].bh);
440 + * Directory block splitting, compacting
443 +static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[])
446 + char *base = (char *) de;
447 + while ((char *) de < base + size)
449 + map[count].hash = dx_hash (de->name, de->name_len);
450 + map[count].offs = (u32) ((char *) de - base);
451 + de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len));
457 +static void dx_sort_map (struct dx_map_entry *map, unsigned count)
459 + struct dx_map_entry *p, *q, *top = map + count - 1;
461 + /* Combsort until bubble sort doesn't suck */
464 + count = count*10/13;
465 + if (count - 9 < 2) /* 9, 10 -> 11 */
467 + for (p = top, q = p - count; q >= map; p--, q--)
468 + if (p->hash < q->hash)
471 + /* Garden variety bubble sort */
477 + if (q[1].hash >= q[0].hash)
485 +static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block)
487 + struct dx_entry *entries = frame->entries, *at = frame->at;
488 + assert (dx_get_count (entries) < dx_get_limit (entries) &&
489 + frame->at < frame->entries+dx_get_count(entries));
490 + memmove (at + 2, at+1, (char *) (entries + dx_get_count(entries)) - (char *) (at));
491 + dx_set_hash(at + 1, hash);
492 + dx_set_block(at + 1, block);
493 + dx_set_count(entries, dx_get_count(entries) + 1);
498 * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
504 + * p is at least 6 bytes before the end of page
506 +static inline ext3_dirent *ext3_next_entry(ext3_dirent *p)
508 + return (ext3_dirent *)((char*)p + le16_to_cpu(p->rec_len));
515 * finds an entry in the specified directory with the wanted name. It
517 * The returned buffer_head has ->b_count elevated. The caller is expected
518 * to brelse() it when appropriate.
522 static struct buffer_head * ext3_find_entry (struct dentry *dentry,
523 struct ext3_dir_entry_2 ** res_dir)
525 @@ -119,10 +564,76 @@
528 struct inode *dir = dentry->d_parent->d_inode;
531 + unsigned blocksize;
532 + ext3_dirent *de, *top;
536 + blocksize = sb->s_blocksize;
537 + namelen = dentry->d_name.len;
538 + name = dentry->d_name.name;
539 + if (namelen > EXT3_NAME_LEN)
541 + if (ext3_dx && is_dx(dir)) {
542 + u32 hash = dx_hash (name, namelen);
543 + struct dx_frame frames[2], *frame;
544 + if (!(frame = dx_probe (dir, hash, frames)))
547 + block = dx_get_block(frame->at);
548 + if (!(bh = ext3_bread (NULL,dir, block, 0, &err)))
550 + de = (ext3_dirent *) bh->b_data;
551 + top = (ext3_dirent *) ((char *) de + blocksize -
552 + EXT3_DIR_REC_LEN(0));
553 + for (; de < top; de = ext3_next_entry(de))
554 + if (ext3_match (namelen, name, de)) {
555 + if (!ext3_check_dir_entry("ext3_find_entry",
557 + (block<<EXT3_BLOCK_SIZE_BITS(sb))
558 + +((char *)de - bh->b_data))) {
566 + /* Same hash continues in next block? Search on. */
567 + if (++(frame->at) == frame->entries + dx_get_count(frame->entries))
569 + struct buffer_head *bh2;
570 + if (frame == frames)
572 + if (++(frames->at) == frames->entries + dx_get_count(frames->entries))
574 + /* should omit read if not continued */
575 + if (!(bh2 = ext3_bread (NULL, dir,
576 + dx_get_block(frames->at),
579 + brelse (frame->bh);
581 + frame->at = frame->entries = ((struct dx_node *) bh2->b_data)->entries;
582 + /* Subtle: the 0th entry has the count, find the hash in frame above */
583 + if ((dx_get_hash(frames->at) & -2) == hash)
587 + if ((dx_get_hash(frame->at) & -2) == hash)
590 + dxtrace(printk("%s not found\n", name));
591 + dx_release (frames);
594 + dx_release (frames);
599 nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
600 start = dir->u.ext3_i.i_dir_start_lookup;
601 if (start >= nblocks)
603 de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
606 +static ext3_dirent *
607 +dx_copy_dirents (char *from, char *to, struct dx_map_entry *map, int count)
609 + unsigned rec_len = 0;
612 + ext3_dirent *de = (ext3_dirent *) (from + map->offs);
613 + rec_len = EXT3_DIR_REC_LEN(de->name_len);
614 + memcpy (to, de, rec_len);
615 + ((ext3_dirent *) to)->rec_len = rec_len;
619 + return (ext3_dirent *) (to - rec_len);
622 +#ifdef CONFIG_EXT3_INDEX
623 +static ext3_dirent *do_split(handle_t *handle, struct inode *dir,
624 + struct buffer_head **bh,struct dx_frame *frame,
625 + u32 hash, int *error)
627 + unsigned blocksize = dir->i_sb->s_blocksize;
628 + unsigned count, continued;
629 + struct buffer_head *bh2;
631 + unsigned MAX_DX_MAP = PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1;
633 + struct dx_map_entry map[MAX_DX_MAP];
634 + char *data1 = (*bh)->b_data, *data2, *data3;
636 + ext3_dirent *de, *de2;
638 + bh2 = ext3_append (handle, dir, &newblock, error);
643 + return (ext3_dirent *)bh2;
646 + BUFFER_TRACE(*bh, "get_write_access");
647 + ext3_journal_get_write_access(handle, *bh);
648 + BUFFER_TRACE(frame->bh, "get_write_access");
649 + ext3_journal_get_write_access(handle, frame->bh);
651 + data2 = bh2->b_data;
653 + count = dx_make_map ((ext3_dirent *) data1, blocksize, map);
654 + split = count/2; // need to adjust to actual middle
655 + dx_sort_map (map, count);
656 + hash2 = map[split].hash;
657 + continued = hash2 == map[split - 1].hash;
658 + dxtrace(printk("Split block %i at %x, %i/%i\n",
659 + dx_get_block(frame->at), hash2, split, count-split));
661 + /* Fancy dance to stay within two buffers */
662 + de2 = dx_copy_dirents (data1, data2, map + split, count - split);
663 + data3 = (char *) de2 + de2->rec_len;
664 + de = dx_copy_dirents (data1, data3, map, split);
665 + memcpy(data1, data3, (char *) de + de->rec_len - data3);
666 + de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de
667 + de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
668 + de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
669 + dxtrace(dx_show_leaf ((ext3_dirent *) data1, blocksize, 1));
670 + dxtrace(dx_show_leaf ((ext3_dirent *) data2, blocksize, 1));
672 + /* Which block gets the new entry? */
678 + dx_insert_block (frame, hash2 + continued, newblock);
679 + ext3_journal_dirty_metadata (handle, bh2);
681 + ext3_journal_dirty_metadata (handle, frame->bh);
682 + dxtrace(dx_show_index ("frame", frame->entries));
693 * AKPM: the journalling code here looks wrong on the error paths
696 static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
699 @@ -258,117 +852,282 @@
700 const char *name = dentry->d_name.name;
701 int namelen = dentry->d_name.len;
702 unsigned long offset;
703 - unsigned short rec_len;
704 struct buffer_head * bh;
705 - struct ext3_dir_entry_2 * de, * de1;
706 - struct super_block * sb;
708 + struct super_block * sb = dir->i_sb;
710 + unsigned short reclen = EXT3_DIR_REC_LEN(namelen);
713 + unsigned blocksize = sb->s_blocksize;
714 + unsigned nlen, rlen;
720 - bh = ext3_bread (handle, dir, 0, 0, &retval);
723 - rec_len = EXT3_DIR_REC_LEN(namelen);
725 - de = (struct ext3_dir_entry_2 *) bh->b_data;
727 - if ((char *)de >= sb->s_blocksize + bh->b_data) {
730 - bh = ext3_bread (handle, dir,
731 - offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
734 - if (dir->i_size <= offset) {
735 - if (dir->i_size == 0) {
738 + if (ext3_dx && is_dx(dir))
740 + struct dx_frame frames[2], *frame;
741 + struct dx_entry *entries, *at;
745 + hash = dx_hash (name, namelen);
746 + frame = dx_probe (dir, hash, frames); // do something if null
747 + entries = frame->entries;
750 + if (!(bh = ext3_bread (handle,dir, dx_get_block(frame->at), 0,&retval)))
753 + BUFFER_TRACE(bh, "get_write_access");
754 + ext3_journal_get_write_access(handle, bh);
756 + data1 = bh->b_data;
757 + de = (ext3_dirent *) data1;
758 + top = data1 + (0? 200: blocksize);
759 + while ((char *) de < top)
761 + /* FIXME: check EEXIST and dir */
762 + nlen = EXT3_DIR_REC_LEN(de->name_len);
763 + rlen = le16_to_cpu(de->rec_len);
764 + if ((de->inode? rlen - nlen: rlen) >= reclen)
766 + de = (ext3_dirent *) ((char *) de + rlen);
768 + /* Block full, should compress but for now just split */
769 + dxtrace(printk("using %u of %u node entries\n",
770 + dx_get_count(entries), dx_get_limit(entries)));
771 + /* Need to split index? */
772 + if (dx_get_count(entries) == dx_get_limit(entries))
775 + unsigned icount = dx_get_count(entries);
776 + int levels = frame - frames;
777 + struct dx_entry *entries2;
778 + struct dx_node *node2;
779 + struct buffer_head *bh2;
780 + if (levels && dx_get_count(frames->entries) == dx_get_limit(frames->entries))
782 + bh2 = ext3_append (handle, dir, &newblock, &retval);
785 + node2 = (struct dx_node *)(bh2->b_data);
786 + entries2 = node2->entries;
787 + node2->fake.rec_len = cpu_to_le16(blocksize);
788 + node2->fake.inode = 0;
789 + BUFFER_TRACE(frame->bh, "get_write_access");
790 + ext3_journal_get_write_access(handle, frame->bh);
793 + unsigned icount1 = icount/2, icount2 = icount - icount1;
794 + unsigned hash2 = dx_get_hash(entries + icount1);
795 + dxtrace(printk("Split index %i/%i\n", icount1, icount2));
797 + BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
798 + ext3_journal_get_write_access(handle, frames[0].bh);
800 + memcpy ((char *) entries2, (char *) (entries + icount1),
801 + icount2 * sizeof(struct dx_entry));
802 + dx_set_count (entries, icount1);
803 + dx_set_count (entries2, icount2);
804 + dx_set_limit (entries2, dx_node_limit(dir));
806 + /* Which index block gets the new entry? */
807 + if (at - entries >= icount1) {
808 + frame->at = at = at - entries - icount1 + entries2;
809 + frame->entries = entries = entries2;
810 + swap(frame->bh, bh2);
813 - ext3_debug ("creating next block\n");
815 - BUFFER_TRACE(bh, "get_write_access");
816 - ext3_journal_get_write_access(handle, bh);
817 - de = (struct ext3_dir_entry_2 *) bh->b_data;
819 - de->rec_len = le16_to_cpu(sb->s_blocksize);
820 - dir->u.ext3_i.i_disksize =
821 - dir->i_size = offset + sb->s_blocksize;
822 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
823 - ext3_mark_inode_dirty(handle, dir);
824 + dx_insert_block (frames + 0, hash2, newblock);
825 + dxtrace(dx_show_index ("node", frames[1].entries));
826 + dxtrace(dx_show_index ("node",
827 + ((struct dx_node *) bh2->b_data)->entries));
828 + ext3_journal_dirty_metadata(handle, bh2);
832 - ext3_debug ("skipping to next block\n");
834 - de = (struct ext3_dir_entry_2 *) bh->b_data;
835 + dxtrace(printk("Creating second level index...\n"));
836 + memcpy((char *) entries2, (char *) entries,
837 + icount * sizeof(struct dx_entry));
838 + dx_set_limit(entries2, dx_node_limit(dir));
841 + dx_set_count(entries, 1);
842 + dx_set_block(entries + 0, newblock);
843 + ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
845 + /* Add new access path frame */
846 + frame = frames + 1;
847 + frame->at = at = at - entries + entries2;
848 + frame->entries = entries = entries2;
850 + ext3_journal_get_write_access(handle, frame->bh);
852 + ext3_journal_dirty_metadata(handle, frames[0].bh);
854 - if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
859 - if (ext3_match (namelen, name, de)) {
860 + de = do_split(handle, dir, &bh, frame, hash, &retval);
861 + dx_release (frames);
864 + nlen = EXT3_DIR_REC_LEN(de->name_len);
865 + rlen = le16_to_cpu(de->rec_len);
869 + dx_release (frames);
873 + ext3_warning(sb, __FUNCTION__, "Directory index full!\n");
878 + dx_release (frames);
882 + blocks = dir->i_size >> sb->s_blocksize_bits;
883 + for (block = 0, offset = 0; block < blocks; block++) {
884 + bh = ext3_bread(handle, dir, block, 0, &retval);
887 + de = (ext3_dirent *)bh->b_data;
888 + top = bh->b_data + blocksize - reclen;
889 + while ((char *) de <= top) {
890 + if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
895 + if (ext3_match (namelen, name, de)) {
899 - if ((le32_to_cpu(de->inode) == 0 &&
900 - le16_to_cpu(de->rec_len) >= rec_len) ||
901 - (le16_to_cpu(de->rec_len) >=
902 - EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
903 - BUFFER_TRACE(bh, "get_write_access");
904 - ext3_journal_get_write_access(handle, bh);
905 - /* By now the buffer is marked for journaling */
906 - offset += le16_to_cpu(de->rec_len);
907 - if (le32_to_cpu(de->inode)) {
908 - de1 = (struct ext3_dir_entry_2 *) ((char *) de +
909 - EXT3_DIR_REC_LEN(de->name_len));
911 - cpu_to_le16(le16_to_cpu(de->rec_len) -
912 - EXT3_DIR_REC_LEN(de->name_len));
913 - de->rec_len = cpu_to_le16(
914 - EXT3_DIR_REC_LEN(de->name_len));
917 - de->file_type = EXT3_FT_UNKNOWN;
919 - de->inode = cpu_to_le32(inode->i_ino);
920 - ext3_set_de_type(dir->i_sb, de, inode->i_mode);
923 - de->name_len = namelen;
924 - memcpy (de->name, name, namelen);
926 - * XXX shouldn't update any times until successful
927 - * completion of syscall, but too many callers depend
930 - * XXX similarly, too many callers depend on
931 - * ext3_new_inode() setting the times, but error
932 - * recovery deletes the inode, so the worst that can
933 - * happen is that the times are slightly out of date
934 - * and/or different from the directory change time.
936 - dir->i_mtime = dir->i_ctime = CURRENT_TIME;
937 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
938 - ext3_mark_inode_dirty(handle, dir);
939 - dir->i_version = ++event;
940 - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
941 - ext3_journal_dirty_metadata(handle, bh);
942 + nlen = EXT3_DIR_REC_LEN(de->name_len);
943 + rlen = le16_to_cpu(de->rec_len);
944 + if ((de->inode? rlen - nlen: rlen) >= reclen)
946 + de = (ext3_dirent *)((char *)de + rlen);
949 + if (ext3_dx && blocks == 1 && test_opt(sb, INDEX))
950 + goto dx_make_index;
953 + bh = ext3_append(handle, dir, &block, &retval);
956 + de = (ext3_dirent *) bh->b_data;
958 + de->rec_len = cpu_to_le16(rlen = blocksize);
963 + BUFFER_TRACE(bh, "get_write_access");
964 + ext3_journal_get_write_access(handle, bh);
965 + /* By now the buffer is marked for journaling */
967 + ext3_dirent *de1 = (ext3_dirent *)((char *)de + nlen);
968 + de1->rec_len = cpu_to_le16(rlen - nlen);
969 + de->rec_len = cpu_to_le16(nlen);
972 + de->file_type = EXT3_FT_UNKNOWN;
974 + de->inode = cpu_to_le32(inode->i_ino);
975 + ext3_set_de_type(dir->i_sb, de, inode->i_mode);
978 + de->name_len = namelen;
979 + memcpy (de->name, name, namelen);
981 + * XXX shouldn't update any times until successful
982 + * completion of syscall, but too many callers depend
985 + * XXX similarly, too many callers depend on
986 + * ext3_new_inode() setting the times, but error
987 + * recovery deletes the inode, so the worst that can
988 + * happen is that the times are slightly out of date
989 + * and/or different from the directory change time.
991 + dir->i_mtime = dir->i_ctime = CURRENT_TIME;
992 + /* EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; */
993 + ext3_mark_inode_dirty(handle, dir);
994 + dir->i_version = ++event;
995 + BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
996 + ext3_journal_dirty_metadata(handle, bh);
1002 + struct buffer_head *bh2;
1003 + struct dx_root *root;
1004 + struct dx_frame frames[2], *frame;
1005 + struct dx_entry *entries;
1011 + dxtrace(printk("Creating index\n"));
1012 + ext3_journal_get_write_access(handle, bh);
1013 + root = (struct dx_root *) bh->b_data;
1015 + ext3_add_compat_feature (sb, EXT3_FEATURE_COMPAT_DIR_INDEX);
1016 + EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
1017 + bh2 = ext3_append (handle, dir, &block, &retval);
1024 - offset += le16_to_cpu(de->rec_len);
1025 - de = (struct ext3_dir_entry_2 *)
1026 - ((char *) de + le16_to_cpu(de->rec_len));
1027 + data1 = bh2->b_data;
1029 + /* The 0th block becomes the root, move the dirents out */
1030 + de = (ext3_dirent *) &root->info;
1031 + len = ((char *) root) + blocksize - (char *) de;
1032 + memcpy (data1, de, len);
1033 + de = (ext3_dirent *) data1;
1034 + top = data1 + len;
1035 + while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
1037 + de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
1038 + /* Initialize the root; the dot dirents already exist */
1039 + de = (ext3_dirent *) (&root->fake2);
1040 + de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
1041 + memset (&root->info, 0, sizeof(root->info));
1042 + root->info.info_length = sizeof(root->info);
1043 + entries = root->entries;
1044 + dx_set_block (entries, 1);
1045 + dx_set_count (entries, 1);
1046 + dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
1048 + /* Initialize as for dx_probe */
1049 + hash = dx_hash (name, namelen);
1051 + frame->entries = entries;
1052 + frame->at = entries;
1055 + de = do_split(handle,dir, &bh, frame, hash, &retval);
1056 + dx_release (frames);
1059 + nlen = EXT3_DIR_REC_LEN(de->name_len);
1060 + rlen = le16_to_cpu(de->rec_len);
1073 * ext3_delete_entry deletes a directory entry by merging it with the
1075 @@ -451,7 +1212,8 @@
1076 struct inode * inode;
1079 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
1080 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1081 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
1083 return PTR_ERR(handle);
1085 @@ -478,7 +1240,8 @@
1086 struct inode *inode;
1089 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
1090 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1091 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
1093 return PTR_ERR(handle);
1095 @@ -507,7 +1270,8 @@
1096 if (dir->i_nlink >= EXT3_LINK_MAX)
1099 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
1100 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1101 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
1103 return PTR_ERR(handle);
1105 @@ -832,7 +1596,7 @@
1106 ext3_mark_inode_dirty(handle, inode);
1108 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1109 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1110 + // EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
1111 ext3_mark_inode_dirty(handle, dir);
1114 @@ -878,7 +1642,7 @@
1117 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1118 - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1119 + // EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
1120 ext3_mark_inode_dirty(handle, dir);
1122 if (!inode->i_nlink)
1123 @@ -904,7 +1668,8 @@
1124 if (l > dir->i_sb->s_blocksize)
1125 return -ENAMETOOLONG;
1127 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
1128 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1129 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5);
1131 return PTR_ERR(handle);
1133 @@ -959,7 +1724,8 @@
1134 if (inode->i_nlink >= EXT3_LINK_MAX)
1137 - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
1138 + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
1139 + EXT3_INDEX_EXTRA_TRANS_BLOCKS);
1141 return PTR_ERR(handle);
1143 @@ -995,7 +1761,8 @@
1145 old_bh = new_bh = dir_bh = NULL;
1147 - handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
1148 + handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
1149 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
1151 return PTR_ERR(handle);
1153 @@ -1077,7 +1844,7 @@
1154 new_inode->i_ctime = CURRENT_TIME;
1156 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1157 - old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1158 + // EXT3_I(old_dir)->i_flags &= ~EXT3_INDEX_FL;
1160 BUFFER_TRACE(dir_bh, "get_write_access");
1161 ext3_journal_get_write_access(handle, dir_bh);
1162 @@ -1089,7 +1856,7 @@
1163 new_inode->i_nlink--;
1166 - new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1167 + // EXT3_I(new_dir)->i_flags &= ~EXT3_INDEX_FL;
1168 ext3_mark_inode_dirty(handle, new_dir);
1171 --- ./include/linux/ext3_fs.h 2002/03/05 06:18:59 2.1
1172 +++ ./include/linux/ext3_fs.h 2002/03/05 06:26:56
1174 #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */
1175 #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
1176 #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
1177 +#define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */
1179 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1180 #ifndef _LINUX_EXT2_FS_H
1181 @@ -575,6 +576,24 @@
1182 #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
1183 #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
1186 + * Hash Tree Directory indexing
1187 + * (c) Daniel Phillips, 2001
1190 +#define CONFIG_EXT3_INDEX
1192 +#ifdef CONFIG_EXT3_INDEX
1193 + enum {ext3_dx = 1};
1194 + #define is_dx(dir) (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)
1195 +#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
1196 +#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
1198 + enum {ext3_dx = 0};
1199 + #define is_dx(dir) 0
1200 +#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
1201 +#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
1206 --- ./include/linux/ext3_jbd.h 2002/03/05 06:18:59 2.1
1207 +++ ./include/linux/ext3_jbd.h 2002/03/05 06:33:54
1210 #define EXT3_RESERVE_TRANS_BLOCKS 12
1212 +#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
1215 ext3_mark_iloc_dirty(handle_t *handle,
1216 struct inode *inode,